diff --git a/backend/package-lock.json b/backend/package-lock.json index 207f0f50..d6de9b3a 100644 --- a/backend/package-lock.json +++ b/backend/package-lock.json @@ -18,8 +18,12 @@ "dotenv": "^17.2.3", "fastify": "^5.6.1", "form-data": "^4.0.4", + "html2canvas": "^1.4.1", "js-yaml": "^4.1.0", + "jspdf": "^3.0.3", + "p-queue": "^9.0.0", "prisma": "^6.17.0", + "tiktoken": "^1.0.22", "zod": "^4.1.12" }, "devDependencies": { @@ -32,6 +36,15 @@ "typescript": "^5.9.3" } }, + "node_modules/@babel/runtime": { + "version": "7.28.4", + "resolved": "https://registry.npmmirror.com/@babel/runtime/-/runtime-7.28.4.tgz", + "integrity": "sha512-Q/N6JNWvIvPnLDvjlE1OUBLPQHH6l3CltCEsHIujp45zQUSSh8K+gHnaEX45yAT1nyngnINhvWtzN+Nb9D8RAQ==", + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, "node_modules/@cspotcode/source-map-support": { "version": "0.8.1", "resolved": "https://registry.npmmirror.com/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz", @@ -861,6 +874,26 @@ "undici-types": "~7.14.0" } }, + "node_modules/@types/pako": { + "version": "2.0.4", + "resolved": "https://registry.npmmirror.com/@types/pako/-/pako-2.0.4.tgz", + "integrity": "sha512-VWDCbrLeVXJM9fihYodcLiIv0ku+AlOa/TQ1SvYOaBuyrSKgEcro95LJyIsJ4vSo6BXIxOKxiJAat04CmST9Fw==", + "license": "MIT" + }, + "node_modules/@types/raf": { + "version": "3.4.3", + "resolved": "https://registry.npmmirror.com/@types/raf/-/raf-3.4.3.tgz", + "integrity": "sha512-c4YAvMedbPZ5tEyxzQdMoOhhJ4RD3rngZIdwC2/qDN3d7JpEhB6fiBRKVY1lg5B7Wk+uPBjn5f39j1/2MY1oOw==", + "license": "MIT", + "optional": true + }, + "node_modules/@types/trusted-types": { + "version": "2.0.7", + "resolved": "https://registry.npmmirror.com/@types/trusted-types/-/trusted-types-2.0.7.tgz", + "integrity": "sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==", + "license": "MIT", + "optional": true + }, "node_modules/abstract-logging": { "version": "2.0.1", "resolved": "https://registry.npmmirror.com/abstract-logging/-/abstract-logging-2.0.1.tgz", @@ -1008,6 +1041,15 @@ "dev": true, "license": "MIT" }, + "node_modules/base64-arraybuffer": { + "version": "1.0.2", + "resolved": "https://registry.npmmirror.com/base64-arraybuffer/-/base64-arraybuffer-1.0.2.tgz", + "integrity": "sha512-I3yl4r9QB5ZRY3XuJVEPfc2XhZO6YweFPI+UovAzn+8/hb3oJ6lnysaFcjVpkCPfVWFUDvoZ8kmVDP7WyRtYtQ==", + "license": "MIT", + "engines": { + "node": ">= 0.6.0" + } + }, "node_modules/binary-extensions": { "version": "2.3.0", "resolved": "https://registry.npmmirror.com/binary-extensions/-/binary-extensions-2.3.0.tgz", @@ -1104,6 +1146,26 @@ "node": ">= 0.4" } }, + "node_modules/canvg": { + "version": "3.0.11", + "resolved": "https://registry.npmmirror.com/canvg/-/canvg-3.0.11.tgz", + "integrity": "sha512-5ON+q7jCTgMp9cjpu4Jo6XbvfYwSB2Ow3kzHKfIyJfaCAOHLbdKPQqGKgfED/R5B+3TFFfe8pegYA+b423SRyA==", + "license": "MIT", + "optional": true, + "dependencies": { + "@babel/runtime": "^7.12.5", + "@types/raf": "^3.4.0", + "core-js": "^3.8.3", + "raf": "^3.4.1", + "regenerator-runtime": "^0.13.7", + "rgbcolor": "^1.0.1", + "stackblur-canvas": "^2.0.0", + "svg-pathdata": "^6.0.3" + }, + "engines": { + "node": ">=10.0.0" + } + }, "node_modules/chokidar": { "version": "4.0.3", "resolved": "https://registry.npmmirror.com/chokidar/-/chokidar-4.0.3.tgz", @@ -1178,6 +1240,18 @@ "node": ">=18" } }, + "node_modules/core-js": { + "version": "3.46.0", + "resolved": "https://registry.npmmirror.com/core-js/-/core-js-3.46.0.tgz", + "integrity": "sha512-vDMm9B0xnqqZ8uSBpZ8sNtRtOdmfShrvT6h2TuQGLs0Is+cR0DYbj/KWP6ALVNbWPpqA/qPLoOuppJN07humpA==", + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/core-js" + } + }, "node_modules/create-require": { "version": "1.1.1", "resolved": "https://registry.npmmirror.com/create-require/-/create-require-1.1.1.tgz", @@ -1185,6 +1259,15 @@ "dev": true, "license": "MIT" }, + "node_modules/css-line-break": { + "version": "2.1.0", + "resolved": "https://registry.npmmirror.com/css-line-break/-/css-line-break-2.1.0.tgz", + "integrity": "sha512-FHcKFCZcAha3LwfVBhCQbW2nCNbkZXn7KVUJcsT5/P8YmfsVja0FMPJr0B903j/E69HUphKiV9iQArX8SDYA4w==", + "license": "MIT", + "dependencies": { + "utrie": "^1.0.2" + } + }, "node_modules/dateformat": { "version": "4.6.3", "resolved": "https://registry.npmmirror.com/dateformat/-/dateformat-4.6.3.tgz", @@ -1262,6 +1345,16 @@ "node": ">=0.3.1" } }, + "node_modules/dompurify": { + "version": "3.3.0", + "resolved": "https://registry.npmmirror.com/dompurify/-/dompurify-3.3.0.tgz", + "integrity": "sha512-r+f6MYR1gGN1eJv0TVQbhA7if/U7P87cdPl3HN5rikqaBSBxLiCb/b9O+2eG0cxz0ghyU+mU1QkbsOwERMYlWQ==", + "license": "(MPL-2.0 OR Apache-2.0)", + "optional": true, + "optionalDependencies": { + "@types/trusted-types": "^2.0.7" + } + }, "node_modules/dotenv": { "version": "17.2.3", "resolved": "https://registry.npmmirror.com/dotenv/-/dotenv-17.2.3.tgz", @@ -1413,6 +1506,12 @@ "@esbuild/win32-x64": "0.25.10" } }, + "node_modules/eventemitter3": { + "version": "5.0.1", + "resolved": "https://registry.npmmirror.com/eventemitter3/-/eventemitter3-5.0.1.tgz", + "integrity": "sha512-GWkBvjiSZK87ELrYOSESUYeVIc9mvLLf/nXalMOS5dYrgZq9o5OVkbZAVM06CVxYsCwH9BDZFPlQTlPA1j4ahA==", + "license": "MIT" + }, "node_modules/exsolve": { "version": "1.0.7", "resolved": "https://registry.npmmirror.com/exsolve/-/exsolve-1.0.7.tgz", @@ -1499,6 +1598,17 @@ "node": ">=20" } }, + "node_modules/fast-png": { + "version": "6.4.0", + "resolved": "https://registry.npmmirror.com/fast-png/-/fast-png-6.4.0.tgz", + "integrity": "sha512-kAqZq1TlgBjZcLr5mcN6NP5Rv4V2f22z00c3g8vRrwkcqjerx7BEhPbOnWCPqaHUl2XWQBJQvOT/FQhdMT7X/Q==", + "license": "MIT", + "dependencies": { + "@types/pako": "^2.0.3", + "iobuffer": "^5.3.2", + "pako": "^2.1.0" + } + }, "node_modules/fast-querystring": { "version": "1.1.2", "resolved": "https://registry.npmmirror.com/fast-querystring/-/fast-querystring-1.1.2.tgz", @@ -1621,6 +1731,12 @@ "xtend": "^4.0.0" } }, + "node_modules/fflate": { + "version": "0.8.2", + "resolved": "https://registry.npmmirror.com/fflate/-/fflate-0.8.2.tgz", + "integrity": "sha512-cPJU47OaAoCbg0pBvzsgpTPhmhqI5eJjh/JIu8tPj5q+T7iLvW/JAYUqmE7KOB4R1ZyEhzBaIQpQpardBF5z8A==", + "license": "MIT" + }, "node_modules/fill-range": { "version": "7.1.1", "resolved": "https://registry.npmmirror.com/fill-range/-/fill-range-7.1.1.tgz", @@ -1856,6 +1972,19 @@ "dev": true, "license": "MIT" }, + "node_modules/html2canvas": { + "version": "1.4.1", + "resolved": "https://registry.npmmirror.com/html2canvas/-/html2canvas-1.4.1.tgz", + "integrity": "sha512-fPU6BHNpsyIhr8yyMpTLLxAbkaK8ArIBcmZIRiBLiDhjeqvXolaEmDGmELFuX9I4xDcaKKcJl+TKZLqruBbmWA==", + "license": "MIT", + "dependencies": { + "css-line-break": "^2.1.0", + "text-segmentation": "^1.0.3" + }, + "engines": { + "node": ">=8.0.0" + } + }, "node_modules/ignore-by-default": { "version": "1.0.1", "resolved": "https://registry.npmmirror.com/ignore-by-default/-/ignore-by-default-1.0.1.tgz", @@ -1869,6 +1998,12 @@ "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", "license": "ISC" }, + "node_modules/iobuffer": { + "version": "5.4.0", + "resolved": "https://registry.npmmirror.com/iobuffer/-/iobuffer-5.4.0.tgz", + "integrity": "sha512-DRebOWuqDvxunfkNJAlc3IzWIPD5xVxwUNbHr7xKB8E6aLJxIPfNX3CoMJghcFjpv6RWQsrcJbghtEwSPoJqMA==", + "license": "MIT" + }, "node_modules/ipaddr.js": { "version": "2.2.0", "resolved": "https://registry.npmmirror.com/ipaddr.js/-/ipaddr.js-2.2.0.tgz", @@ -1980,6 +2115,23 @@ "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", "license": "MIT" }, + "node_modules/jspdf": { + "version": "3.0.3", + "resolved": "https://registry.npmmirror.com/jspdf/-/jspdf-3.0.3.tgz", + "integrity": "sha512-eURjAyz5iX1H8BOYAfzvdPfIKK53V7mCpBTe7Kb16PaM8JSXEcUQNBQaiWMI8wY5RvNOPj4GccMjTlfwRBd+oQ==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.26.9", + "fast-png": "^6.2.0", + "fflate": "^0.8.1" + }, + "optionalDependencies": { + "canvg": "^3.0.11", + "core-js": "^3.6.0", + "dompurify": "^3.2.4", + "html2canvas": "^1.0.0-rc.5" + } + }, "node_modules/light-my-request": { "version": "6.6.0", "resolved": "https://registry.npmmirror.com/light-my-request/-/light-my-request-6.6.0.tgz", @@ -2232,6 +2384,40 @@ "wrappy": "1" } }, + "node_modules/p-queue": { + "version": "9.0.0", + "resolved": "https://registry.npmmirror.com/p-queue/-/p-queue-9.0.0.tgz", + "integrity": "sha512-KO1RyxstL9g1mK76530TExamZC/S2Glm080Nx8PE5sTd7nlduDQsAfEl4uXX+qZjLiwvDauvzXavufy3+rJ9zQ==", + "license": "MIT", + "dependencies": { + "eventemitter3": "^5.0.1", + "p-timeout": "^7.0.0" + }, + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/p-timeout": { + "version": "7.0.1", + "resolved": "https://registry.npmmirror.com/p-timeout/-/p-timeout-7.0.1.tgz", + "integrity": "sha512-AxTM2wDGORHGEkPCt8yqxOTMgpfbEHqF51f/5fJCmwFC3C/zNcGT63SymH2ttOAaiIws2zVg4+izQCjrakcwHg==", + "license": "MIT", + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/pako": { + "version": "2.1.0", + "resolved": "https://registry.npmmirror.com/pako/-/pako-2.1.0.tgz", + "integrity": "sha512-w+eufiZ1WuJYgPXbV/PO3NCMEc3xqylkKHzp8bxp1uW4qaSNQUkwmLLEc3kKsfz8lpV1F8Ht3U1Cm+9Srog2ug==", + "license": "(MIT AND Zlib)" + }, "node_modules/pathe": { "version": "2.0.3", "resolved": "https://registry.npmmirror.com/pathe/-/pathe-2.0.3.tgz", @@ -2244,6 +2430,13 @@ "integrity": "sha512-xCy9V055GLEqoFaHoC1SoLIaLmWctgCUaBaWxDZ7/Zx4CTyX7cJQLJOok/orfjZAh9kEYpjJa4d0KcJmCbctZA==", "license": "MIT" }, + "node_modules/performance-now": { + "version": "2.1.0", + "resolved": "https://registry.npmmirror.com/performance-now/-/performance-now-2.1.0.tgz", + "integrity": "sha512-7EAHlyLHI56VEIdK57uwHdHKIaAGbnXPiw0yWbarQZOKaKpvUIgW0jWRVLiatnM+XXlSwsanIBH/hzGMJulMow==", + "license": "MIT", + "optional": true + }, "node_modules/picomatch": { "version": "2.3.1", "resolved": "https://registry.npmmirror.com/picomatch/-/picomatch-2.3.1.tgz", @@ -2417,6 +2610,16 @@ "integrity": "sha512-tYC1Q1hgyRuHgloV/YXs2w15unPVh8qfu/qCTfhTYamaw7fyhumKa2yGpdSo87vY32rIclj+4fWYQXUMs9EHvg==", "license": "MIT" }, + "node_modules/raf": { + "version": "3.4.1", + "resolved": "https://registry.npmmirror.com/raf/-/raf-3.4.1.tgz", + "integrity": "sha512-Sq4CW4QhwOHE8ucn6J34MqtZCeWFP2aQSmrlroYgqAV1PjStIhJXxYuTgUIfkEk7zTLjmIjLmU5q+fbD1NnOJA==", + "license": "MIT", + "optional": true, + "dependencies": { + "performance-now": "^2.1.0" + } + }, "node_modules/rc9": { "version": "2.1.2", "resolved": "https://registry.npmmirror.com/rc9/-/rc9-2.1.2.tgz", @@ -2449,6 +2652,13 @@ "node": ">= 12.13.0" } }, + "node_modules/regenerator-runtime": { + "version": "0.13.11", + "resolved": "https://registry.npmmirror.com/regenerator-runtime/-/regenerator-runtime-0.13.11.tgz", + "integrity": "sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==", + "license": "MIT", + "optional": true + }, "node_modules/require-from-string": { "version": "2.0.2", "resolved": "https://registry.npmmirror.com/require-from-string/-/require-from-string-2.0.2.tgz", @@ -2493,6 +2703,16 @@ "integrity": "sha512-q1b3N5QkRUWUl7iyylaaj3kOpIT0N2i9MqIEQXP73GVsN9cw3fdx8X63cEmWhJGi2PPCF23Ijp7ktmd39rawIA==", "license": "MIT" }, + "node_modules/rgbcolor": { + "version": "1.0.1", + "resolved": "https://registry.npmmirror.com/rgbcolor/-/rgbcolor-1.0.1.tgz", + "integrity": "sha512-9aZLIrhRaD97sgVhtJOW6ckOEh6/GnvQtdVNfdZ6s67+3/XwLS9lBcQYzEEhYVeUowN7pRzMLsyGhK2i/xvWbw==", + "license": "MIT OR SEE LICENSE IN FEEL-FREE.md", + "optional": true, + "engines": { + "node": ">= 0.8.15" + } + }, "node_modules/safe-buffer": { "version": "5.2.1", "resolved": "https://registry.npmmirror.com/safe-buffer/-/safe-buffer-5.2.1.tgz", @@ -2618,6 +2838,16 @@ "node": ">= 10.x" } }, + "node_modules/stackblur-canvas": { + "version": "2.7.0", + "resolved": "https://registry.npmmirror.com/stackblur-canvas/-/stackblur-canvas-2.7.0.tgz", + "integrity": "sha512-yf7OENo23AGJhBriGx0QivY5JP6Y1HbrrDI6WLt6C5auYZXlQrheoY8hD4ibekFKz1HOfE48Ww8kMWMnJD/zcQ==", + "license": "MIT", + "optional": true, + "engines": { + "node": ">=0.1.14" + } + }, "node_modules/steed": { "version": "1.1.3", "resolved": "https://registry.npmmirror.com/steed/-/steed-1.1.3.tgz", @@ -2657,6 +2887,25 @@ "node": ">=4" } }, + "node_modules/svg-pathdata": { + "version": "6.0.3", + "resolved": "https://registry.npmmirror.com/svg-pathdata/-/svg-pathdata-6.0.3.tgz", + "integrity": "sha512-qsjeeq5YjBZ5eMdFuUa4ZosMLxgr5RZ+F+Y1OrDhuOCEInRMA3x74XdBtggJcj9kOeInz0WE+LgCPDkZFlBYJw==", + "license": "MIT", + "optional": true, + "engines": { + "node": ">=12.0.0" + } + }, + "node_modules/text-segmentation": { + "version": "1.0.3", + "resolved": "https://registry.npmmirror.com/text-segmentation/-/text-segmentation-1.0.3.tgz", + "integrity": "sha512-iOiPUo/BGnZ6+54OsWxZidGCsdU8YbE4PSpdPinp7DeMtUJNJBoJ/ouUSTJjHkh1KntHaltHl/gDs2FC4i5+Nw==", + "license": "MIT", + "dependencies": { + "utrie": "^1.0.2" + } + }, "node_modules/thread-stream": { "version": "3.1.0", "resolved": "https://registry.npmmirror.com/thread-stream/-/thread-stream-3.1.0.tgz", @@ -2666,6 +2915,12 @@ "real-require": "^0.2.0" } }, + "node_modules/tiktoken": { + "version": "1.0.22", + "resolved": "https://registry.npmmirror.com/tiktoken/-/tiktoken-1.0.22.tgz", + "integrity": "sha512-PKvy1rVF1RibfF3JlXBSP0Jrcw2uq3yXdgcEXtKTYn3QJ/cBRBHDnrJ5jHky+MENZ6DIPwNUGWpkVx+7joCpNA==", + "license": "MIT" + }, "node_modules/tinyexec": { "version": "1.0.1", "resolved": "https://registry.npmmirror.com/tinyexec/-/tinyexec-1.0.1.tgz", @@ -2795,6 +3050,15 @@ "integrity": "sha512-QQiYxHuyZ9gQUIrmPo3IA+hUl4KYk8uSA7cHrcKd/l3p1OTpZcM0Tbp9x7FAtXdAYhlasd60ncPpgu6ihG6TOA==", "license": "MIT" }, + "node_modules/utrie": { + "version": "1.0.2", + "resolved": "https://registry.npmmirror.com/utrie/-/utrie-1.0.2.tgz", + "integrity": "sha512-1MLa5ouZiOmQzUbjbu9VmjLzn1QLXBhwpUa7kdLUQK+KQ5KA9I1vk5U4YHe/X2Ch7PYnJfWuWT+VbuxbGwljhw==", + "license": "MIT", + "dependencies": { + "base64-arraybuffer": "^1.0.2" + } + }, "node_modules/v8-compile-cache-lib": { "version": "3.0.1", "resolved": "https://registry.npmmirror.com/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", diff --git a/backend/package.json b/backend/package.json index 9e0bd6ff..6af65604 100644 --- a/backend/package.json +++ b/backend/package.json @@ -11,8 +11,12 @@ "prisma:generate": "prisma generate", "prisma:migrate": "prisma migrate dev", "prisma:studio": "prisma studio", + "prisma:seed": "tsx prisma/seed.ts", "test": "echo \"Error: no test specified\" && exit 1" }, + "prisma": { + "seed": "tsx prisma/seed.ts" + }, "keywords": [ "ai", "clinical", @@ -31,8 +35,12 @@ "dotenv": "^17.2.3", "fastify": "^5.6.1", "form-data": "^4.0.4", + "html2canvas": "^1.4.1", "js-yaml": "^4.1.0", + "jspdf": "^3.0.3", + "p-queue": "^9.0.0", "prisma": "^6.17.0", + "tiktoken": "^1.0.22", "zod": "^4.1.12" }, "devDependencies": { diff --git a/backend/prisma/schema.prisma b/backend/prisma/schema.prisma index edaa3568..6bbf928e 100644 --- a/backend/prisma/schema.prisma +++ b/backend/prisma/schema.prisma @@ -8,6 +8,7 @@ generator client { datasource db { provider = "postgresql" url = env("DATABASE_URL") + schemas = ["platform_schema", "aia_schema", "pkb_schema", "asl_schema", "common_schema", "dc_schema", "rvw_schema", "admin_schema", "ssa_schema", "st_schema", "public"] } // ==================== 用户模块 ==================== @@ -38,11 +39,15 @@ model User { documents Document[] adminLogs AdminLog[] generalConversations GeneralConversation[] + batchTasks BatchTask[] // Phase 3: 批处理任务 + taskTemplates TaskTemplate[] // Phase 3: 任务模板 + reviewTasks ReviewTask[] // 稿件审查任务 @@index([email]) @@index([status]) @@index([createdAt]) @@map("users") + @@schema("platform_schema") } // ==================== 项目模块 ==================== @@ -66,6 +71,7 @@ model Project { @@index([createdAt]) @@index([deletedAt]) @@map("projects") + @@schema("aia_schema") } // ==================== 对话模块 ==================== @@ -95,6 +101,7 @@ model Conversation { @@index([createdAt]) @@index([deletedAt]) @@map("conversations") + @@schema("aia_schema") } model Message { @@ -115,6 +122,7 @@ model Message { @@index([createdAt]) @@index([isPinned]) @@map("messages") + @@schema("aia_schema") } // ==================== 知识库模块 ==================== @@ -133,10 +141,12 @@ model KnowledgeBase { user User @relation(fields: [userId], references: [id], onDelete: Cascade) documents Document[] + batchTasks BatchTask[] // Phase 3: 批处理任务 @@index([userId]) @@index([difyDatasetId]) @@map("knowledge_bases") + @@schema("pkb_schema") } model Document { @@ -154,17 +164,122 @@ model Document { segmentsCount Int? @map("segments_count") tokensCount Int? @map("tokens_count") + // Phase 2: 全文阅读模式新增字段 + extractionMethod String? @map("extraction_method") // pymupdf/nougat/mammoth/direct + extractionQuality Float? @map("extraction_quality") // 0-1质量分数 + charCount Int? @map("char_count") // 字符数 + language String? // 检测到的语言 (chinese/english) + extractedText String? @map("extracted_text") @db.Text // 提取的文本内容 + uploadedAt DateTime @default(now()) @map("uploaded_at") processedAt DateTime? @map("processed_at") knowledgeBase KnowledgeBase @relation(fields: [kbId], references: [id], onDelete: Cascade) user User @relation(fields: [userId], references: [id], onDelete: Cascade) + batchResults BatchResult[] // Phase 3: 批处理结果 @@index([kbId]) @@index([userId]) @@index([status]) @@index([difyDocumentId]) + @@index([extractionMethod]) @@map("documents") + @@schema("pkb_schema") +} + +// ==================== Phase 3: 批处理模块 ==================== + +// 批处理任务 +model BatchTask { + id String @id @default(uuid()) + userId String @map("user_id") + kbId String @map("kb_id") + + // 任务基本信息 + name String // 任务名称(用户可自定义) + templateType String @map("template_type") // 'preset' | 'custom' + templateId String? @map("template_id") // 预设模板ID(如'clinical_research') + prompt String @db.Text // 提示词(完整的) + + // 执行状态 + status String // 'processing' | 'completed' | 'failed' | 'paused' + totalDocuments Int @map("total_documents") + completedCount Int @default(0) @map("completed_count") + failedCount Int @default(0) @map("failed_count") + + // 配置 + modelType String @map("model_type") // 使用的模型 + concurrency Int @default(3) // 固定为3 + + // 时间统计 + startedAt DateTime? @map("started_at") + completedAt DateTime? @map("completed_at") + durationSeconds Int? @map("duration_seconds") // 执行时长(秒) + + // 关联 + results BatchResult[] + user User @relation(fields: [userId], references: [id], onDelete: Cascade) + knowledgeBase KnowledgeBase @relation(fields: [kbId], references: [id], onDelete: Cascade) + + createdAt DateTime @default(now()) @map("created_at") + updatedAt DateTime @updatedAt @map("updated_at") + + @@index([userId]) + @@index([kbId]) + @@index([status]) + @@index([createdAt]) + @@map("batch_tasks") + @@schema("pkb_schema") +} + +// 批处理结果(每篇文献一条) +model BatchResult { + id String @id @default(uuid()) + taskId String @map("task_id") + documentId String @map("document_id") + + // 执行结果 + status String // 'success' | 'failed' + data Json? // 提取的结构化数据(预设模板)或文本(自定义) + rawOutput String? @db.Text @map("raw_output") // AI原始输出(备份) + errorMessage String? @db.Text @map("error_message") // 错误信息 + + // 性能指标 + processingTimeMs Int? @map("processing_time_ms") // 处理时长(毫秒) + tokensUsed Int? @map("tokens_used") // Token使用量 + + // 关联 + task BatchTask @relation(fields: [taskId], references: [id], onDelete: Cascade) + document Document @relation(fields: [documentId], references: [id], onDelete: Cascade) + + createdAt DateTime @default(now()) @map("created_at") + + @@index([taskId]) + @@index([documentId]) + @@index([status]) + @@map("batch_results") + @@schema("pkb_schema") +} + +// 任务模板(暂不实现,预留) +model TaskTemplate { + id String @id @default(uuid()) + userId String @map("user_id") + + name String + description String? + prompt String @db.Text + outputFields Json // 期望的输出字段定义 + isPublic Boolean @default(false) @map("is_public") + + user User @relation(fields: [userId], references: [id], onDelete: Cascade) + + createdAt DateTime @default(now()) @map("created_at") + updatedAt DateTime @updatedAt @map("updated_at") + + @@index([userId]) + @@map("task_templates") + @@schema("pkb_schema") } // ==================== 运营管理模块 ==================== @@ -187,6 +302,7 @@ model AdminLog { @@index([createdAt]) @@index([action]) @@map("admin_logs") + @@schema("public") } // ==================== 通用对话模块 ==================== @@ -208,6 +324,7 @@ model GeneralConversation { @@index([createdAt]) @@index([updatedAt]) @@map("general_conversations") + @@schema("aia_schema") } model GeneralMessage { @@ -226,4 +343,51 @@ model GeneralMessage { @@index([conversationId]) @@index([createdAt]) @@map("general_messages") + @@schema("aia_schema") +} + +// ==================== 稿件审查模块 ==================== + +// 稿件审查任务 +model ReviewTask { + id String @id @default(uuid()) + userId String @map("user_id") + + // 文件信息 + fileName String @map("file_name") + fileSize Int @map("file_size") + filePath String? @map("file_path") + + // 文档内容 + extractedText String @map("extracted_text") @db.Text + wordCount Int? @map("word_count") + + // 执行状态 + status String @default("pending") + // pending, extracting, reviewing_editorial, reviewing_methodology, completed, failed + + // 评估结果(JSON) + editorialReview Json? @map("editorial_review") + methodologyReview Json? @map("methodology_review") + overallScore Float? @map("overall_score") + + // 执行信息 + modelUsed String? @map("model_used") + startedAt DateTime? @map("started_at") + completedAt DateTime? @map("completed_at") + durationSeconds Int? @map("duration_seconds") + errorMessage String? @map("error_message") @db.Text + + // 元数据 + createdAt DateTime @default(now()) @map("created_at") + updatedAt DateTime @updatedAt @map("updated_at") + + // 关联 + user User @relation(fields: [userId], references: [id], onDelete: Cascade) + + @@index([userId]) + @@index([status]) + @@index([createdAt]) + @@map("review_tasks") + @@schema("public") } diff --git a/backend/src/common/document/ExtractionClient.ts b/backend/src/common/document/ExtractionClient.ts new file mode 100644 index 00000000..df55ddd6 --- /dev/null +++ b/backend/src/common/document/ExtractionClient.ts @@ -0,0 +1,272 @@ +import FormData from 'form-data'; +import axios from 'axios'; + +/** + * Extraction Service Client + * 调用Python微服务进行文档提取 + */ + +const EXTRACTION_SERVICE_URL = process.env.EXTRACTION_SERVICE_URL || 'http://localhost:8000'; + +export interface ExtractionResult { + success: boolean; + method: string; // pymupdf/nougat/mammoth/direct + text: string; + quality?: number; + encoding?: string; + language?: string; + metadata: { + filename: string; + char_count?: number; + line_count?: number; + file_size?: number; + page_count?: number; + has_tables?: boolean; + [key: string]: any; + }; + error?: string; +} + +class ExtractionClient { + private baseUrl: string; + + constructor(baseUrl: string = EXTRACTION_SERVICE_URL) { + this.baseUrl = baseUrl; + } + + /** + * 健康检查 + */ + async health(): Promise<{ + status: string; + checks: any; + timestamp: string; + }> { + try { + const response = await axios.get(`${this.baseUrl}/api/health`); + return response.data; + } catch (error) { + console.error('[ExtractionClient] Health check failed:', error); + throw new Error('Extraction service is unavailable'); + } + } + + /** + * 通用文档提取接口 + * 自动检测文件类型并调用相应的提取方法 + */ + async extractDocument( + file: Buffer, + filename: string + ): Promise { + try { + const formData = new FormData(); + formData.append('file', file, filename); + + const response = await axios.post( + `${this.baseUrl}/api/extract`, + formData, + { + headers: { + ...formData.getHeaders(), + }, + timeout: 120000, // 2分钟超时 + } + ); + + return response.data; + } catch (error) { + console.error('[ExtractionClient] Extract failed:', error); + + if (axios.isAxiosError(error) && error.response) { + throw new Error(`Extraction failed: ${error.response.data.detail || error.message}`); + } + + throw new Error('Document extraction failed'); + } + } + + /** + * PDF专用提取接口 + */ + async extractPdf( + file: Buffer, + filename: string, + method?: 'auto' | 'nougat' | 'pymupdf' + ): Promise { + try { + const formData = new FormData(); + formData.append('file', file, filename); + + if (method) { + formData.append('method', method); + } + + const response = await axios.post( + `${this.baseUrl}/api/extract/pdf`, + formData, + { + headers: { + ...formData.getHeaders(), + }, + timeout: 180000, // 3分钟超时(Nougat较慢) + } + ); + + return response.data; + } catch (error) { + console.error('[ExtractionClient] PDF extract failed:', error); + + if (axios.isAxiosError(error) && error.response) { + throw new Error(`PDF extraction failed: ${error.response.data.detail || error.message}`); + } + + throw new Error('PDF extraction failed'); + } + } + + /** + * Docx专用提取接口 + */ + async extractDocx( + file: Buffer, + filename: string + ): Promise { + try { + const formData = new FormData(); + formData.append('file', file, filename); + + const response = await axios.post( + `${this.baseUrl}/api/extract/docx`, + formData, + { + headers: { + ...formData.getHeaders(), + }, + timeout: 60000, // 1分钟超时 + } + ); + + return response.data; + } catch (error) { + console.error('[ExtractionClient] Docx extract failed:', error); + + if (axios.isAxiosError(error) && error.response) { + throw new Error(`Docx extraction failed: ${error.response.data.detail || error.message}`); + } + + throw new Error('Docx extraction failed'); + } + } + + /** + * Txt专用提取接口 + */ + async extractTxt( + file: Buffer, + filename: string + ): Promise { + try { + const formData = new FormData(); + formData.append('file', file, filename); + + const response = await axios.post( + `${this.baseUrl}/api/extract/txt`, + formData, + { + headers: { + ...formData.getHeaders(), + }, + timeout: 30000, // 30秒超时 + } + ); + + return response.data; + } catch (error) { + console.error('[ExtractionClient] Txt extract failed:', error); + + if (axios.isAxiosError(error) && error.response) { + throw new Error(`Txt extraction failed: ${error.response.data.detail || error.message}`); + } + + throw new Error('Txt extraction failed'); + } + } + + /** + * 检测PDF语言 + */ + async detectLanguage( + file: Buffer, + filename: string + ): Promise<{ + language: string; + chinese_ratio: number; + chinese_chars: number; + total_chars: number; + }> { + try { + const formData = new FormData(); + formData.append('file', file, filename); + + const response = await axios.post( + `${this.baseUrl}/api/detect-language`, + formData, + { + headers: { + ...formData.getHeaders(), + }, + timeout: 30000, + } + ); + + return response.data; + } catch (error) { + console.error('[ExtractionClient] Language detection failed:', error); + throw new Error('Language detection failed'); + } + } + + /** + * 获取PDF处理策略 + */ + async getPdfStrategy( + file: Buffer, + filename: string + ): Promise<{ + detected_language: string; + recommended_method: string; + reason: string; + nougat_available: boolean; + }> { + try { + const formData = new FormData(); + formData.append('file', file, filename); + + const response = await axios.post( + `${this.baseUrl}/api/pdf-strategy`, + formData, + { + headers: { + ...formData.getHeaders(), + }, + timeout: 30000, + } + ); + + return response.data; + } catch (error) { + console.error('[ExtractionClient] Get PDF strategy failed:', error); + throw new Error('Get PDF strategy failed'); + } + } +} + +// 导出单例 +export const extractionClient = new ExtractionClient(); + + + + + + diff --git a/backend/src/adapters/DeepSeekAdapter.ts b/backend/src/common/llm/adapters/DeepSeekAdapter.ts similarity index 96% rename from backend/src/adapters/DeepSeekAdapter.ts rename to backend/src/common/llm/adapters/DeepSeekAdapter.ts index 240a0677..017ca3ac 100644 --- a/backend/src/adapters/DeepSeekAdapter.ts +++ b/backend/src/common/llm/adapters/DeepSeekAdapter.ts @@ -1,6 +1,6 @@ import axios from 'axios'; import { ILLMAdapter, Message, LLMOptions, LLMResponse, StreamChunk } from './types.js'; -import { config } from '../config/env.js'; +import { config } from '../../../config/env.js'; export class DeepSeekAdapter implements ILLMAdapter { modelName: string; @@ -35,7 +35,7 @@ export class DeepSeekAdapter implements ILLMAdapter { 'Content-Type': 'application/json', Authorization: `Bearer ${this.apiKey}`, }, - timeout: 60000, // 60秒超时 + timeout: 180000, // 180秒超时(3分钟)- 稿件评估需要更长时间 } ); diff --git a/backend/src/adapters/LLMFactory.ts b/backend/src/common/llm/adapters/LLMFactory.ts similarity index 83% rename from backend/src/adapters/LLMFactory.ts rename to backend/src/common/llm/adapters/LLMFactory.ts index 57ef6450..c151754c 100644 --- a/backend/src/adapters/LLMFactory.ts +++ b/backend/src/common/llm/adapters/LLMFactory.ts @@ -29,7 +29,11 @@ export class LLMFactory { break; case 'qwen3-72b': - adapter = new QwenAdapter('qwen-max'); // Qwen3-72B对应的模型名 + adapter = new QwenAdapter('qwen-plus'); // Qwen3-72B对应的模型名 + break; + + case 'qwen-long': + adapter = new QwenAdapter('qwen-long'); // 1M上下文超长文本模型 break; case 'gemini-pro': @@ -63,7 +67,7 @@ export class LLMFactory { * @returns 是否支持 */ static isSupported(modelType: string): boolean { - return ['deepseek-v3', 'qwen3-72b', 'gemini-pro'].includes(modelType); + return ['deepseek-v3', 'qwen3-72b', 'qwen-long', 'gemini-pro'].includes(modelType); } /** @@ -71,7 +75,7 @@ export class LLMFactory { * @returns 支持的模型列表 */ static getSupportedModels(): ModelType[] { - return ['deepseek-v3', 'qwen3-72b', 'gemini-pro']; + return ['deepseek-v3', 'qwen3-72b', 'qwen-long', 'gemini-pro']; } } diff --git a/backend/src/adapters/QwenAdapter.ts b/backend/src/common/llm/adapters/QwenAdapter.ts similarity index 89% rename from backend/src/adapters/QwenAdapter.ts rename to backend/src/common/llm/adapters/QwenAdapter.ts index 31279828..d9c4a5c2 100644 --- a/backend/src/adapters/QwenAdapter.ts +++ b/backend/src/common/llm/adapters/QwenAdapter.ts @@ -1,6 +1,6 @@ import axios from 'axios'; import { ILLMAdapter, Message, LLMOptions, LLMResponse, StreamChunk } from './types.js'; -import { config } from '../config/env.js'; +import { config } from '../../../config/env.js'; export class QwenAdapter implements ILLMAdapter { modelName: string; @@ -39,7 +39,7 @@ export class QwenAdapter implements ILLMAdapter { 'Content-Type': 'application/json', Authorization: `Bearer ${this.apiKey}`, }, - timeout: 60000, + timeout: 180000, // 180秒超时(3分钟)- 稿件评估需要更长时间 } ); @@ -74,6 +74,15 @@ export class QwenAdapter implements ILLMAdapter { onChunk?: (chunk: StreamChunk) => void ): AsyncGenerator { try { + // Qwen-Long需要更长的超时时间(全文模式可能传输~750K tokens) + const timeout = this.modelName === 'qwen-long' ? 300000 : 60000; // 5分钟 vs 1分钟 + + console.log(`[QwenAdapter] 开始流式调用`, { + model: this.modelName, + timeout: `${timeout / 1000}秒`, + messagesCount: messages.length, + }); + const response = await axios.post( this.baseURL, { @@ -96,7 +105,7 @@ export class QwenAdapter implements ILLMAdapter { 'X-DashScope-SSE': 'enable', }, responseType: 'stream', - timeout: 60000, + timeout: timeout, } ); diff --git a/backend/src/adapters/types.ts b/backend/src/common/llm/adapters/types.ts similarity index 92% rename from backend/src/adapters/types.ts rename to backend/src/common/llm/adapters/types.ts index 2c931684..60f37603 100644 --- a/backend/src/adapters/types.ts +++ b/backend/src/common/llm/adapters/types.ts @@ -51,7 +51,7 @@ export interface ILLMAdapter { } // 支持的模型类型 -export type ModelType = 'deepseek-v3' | 'qwen3-72b' | 'gemini-pro'; +export type ModelType = 'deepseek-v3' | 'qwen3-72b' | 'qwen-long' | 'gemini-pro'; diff --git a/backend/src/middleware/validateProject.ts b/backend/src/common/middleware/validateProject.ts similarity index 100% rename from backend/src/middleware/validateProject.ts rename to backend/src/common/middleware/validateProject.ts diff --git a/backend/src/clients/DifyClient.ts b/backend/src/common/rag/DifyClient.ts similarity index 98% rename from backend/src/clients/DifyClient.ts rename to backend/src/common/rag/DifyClient.ts index 0994504a..8a4b9953 100644 --- a/backend/src/clients/DifyClient.ts +++ b/backend/src/common/rag/DifyClient.ts @@ -14,7 +14,7 @@ import { DifyError, DifyErrorResponse, } from './types.js'; -import { config } from '../config/env.js'; +import { config } from '../../config/env.js'; /** * Dify API 客户端 @@ -144,7 +144,7 @@ export class DifyClient { ], segmentation: { separator: '\n', - max_tokens: 500, + max_tokens: 1500, // Phase 1优化:从500增加到1500 tokens }, }, }, diff --git a/backend/src/clients/types.ts b/backend/src/common/rag/types.ts similarity index 100% rename from backend/src/clients/types.ts rename to backend/src/common/rag/types.ts diff --git a/backend/src/common/utils/jsonParser.ts b/backend/src/common/utils/jsonParser.ts new file mode 100644 index 00000000..f5d557aa --- /dev/null +++ b/backend/src/common/utils/jsonParser.ts @@ -0,0 +1,152 @@ +/** + * Phase 3: 批处理模式 - JSON解析工具 + * + * AI的输出可能包含额外的文字说明,需要提取JSON块并解析 + */ + +export interface ParseResult { + success: boolean; + data?: T; + error?: string; + rawOutput: string; +} + +/** + * 从AI输出中提取JSON块 + * + * 支持的格式: + * 1. 纯JSON:{ "key": "value" } + * 2. 带前言:这是提取结果:\n{ "key": "value" } + * 3. 带后缀:{ "key": "value" }\n\n以上是提取结果 + * 4. 代码块:```json\n{ "key": "value" }\n``` + */ +export function extractJSON(text: string): string | null { + // 尝试1:直接查找 {...} 或 [...] + const jsonPattern = /(\{[\s\S]*\}|\[[\s\S]*\])/; + const match = text.match(jsonPattern); + + if (match) { + return match[1]; + } + + // 尝试2:查找代码块中的JSON + const codeBlockPattern = /```(?:json)?\s*\n?([\s\S]*?)\n?```/; + const codeMatch = text.match(codeBlockPattern); + + if (codeMatch) { + return codeMatch[1].trim(); + } + + return null; +} + +/** + * 解析JSON字符串 + * + * @param jsonString JSON字符串 + * @param expectedFields 期望的字段列表(可选,用于验证) + * @returns 解析结果 + */ +export function parseJSON( + jsonString: string, + expectedFields?: string[] +): ParseResult { + const rawOutput = jsonString; + + try { + // 提取JSON块 + const extracted = extractJSON(jsonString); + + if (!extracted) { + return { + success: false, + error: '未找到JSON格式的数据', + rawOutput, + }; + } + + // 解析JSON + const data = JSON.parse(extracted) as T; + + // 验证字段(如果提供了expectedFields) + if (expectedFields && Array.isArray(expectedFields)) { + const missingFields: string[] = []; + + for (const field of expectedFields) { + if (!(field in (data as any))) { + missingFields.push(field); + } + } + + if (missingFields.length > 0) { + console.warn(`[JsonParser] 缺少字段: ${missingFields.join(', ')}`); + // 为缺失字段填充默认值 + for (const field of missingFields) { + (data as any)[field] = '未提取到'; + } + } + } + + return { + success: true, + data, + rawOutput, + }; + + } catch (error: any) { + return { + success: false, + error: error.message, + rawOutput, + }; + } +} + +/** + * 验证JSON数据是否符合模板要求 + * + * @param data 解析后的数据 + * @param templateFields 模板字段定义 + * @returns 是否有效 + */ +export function validateTemplateData( + data: any, + templateFields: Array<{ key: string; type: string }> +): { valid: boolean; errors: string[] } { + const errors: string[] = []; + + if (!data || typeof data !== 'object') { + errors.push('数据不是有效的对象'); + return { valid: false, errors }; + } + + for (const field of templateFields) { + const value = data[field.key]; + + // 检查字段是否存在 + if (value === undefined || value === null || value === '') { + console.warn(`[JsonParser] 字段 ${field.key} 为空`); + // 不算错误,只是警告 + } + + // 类型检查(宽松) + if (field.type === 'number' && typeof value !== 'number' && value !== '') { + // 尝试转换 + const num = Number(value); + if (!isNaN(num)) { + data[field.key] = num; + } + } + } + + return { valid: errors.length === 0, errors }; +} + + + + + + + + + diff --git a/backend/src/config/env.ts b/backend/src/config/env.ts index af55c4fa..fd257619 100644 --- a/backend/src/config/env.ts +++ b/backend/src/config/env.ts @@ -27,8 +27,15 @@ export const config = { // LLM API配置 deepseekApiKey: process.env.DEEPSEEK_API_KEY || '', + deepseekBaseUrl: process.env.DEEPSEEK_BASE_URL || 'https://api.deepseek.com', + dashscopeApiKey: process.env.DASHSCOPE_API_KEY || '', // 用于Qwen模型 geminiApiKey: process.env.GEMINI_API_KEY || '', + + // CloseAI配置(代理OpenAI和Claude) + closeaiApiKey: process.env.CLOSEAI_API_KEY || '', + closeaiOpenaiBaseUrl: process.env.CLOSEAI_OPENAI_BASE_URL || 'https://api.openai-proxy.org/v1', + closeaiClaudeBaseUrl: process.env.CLOSEAI_CLAUDE_BASE_URL || 'https://api.openai-proxy.org/anthropic', // Dify配置 difyApiKey: process.env.DIFY_API_KEY || '', diff --git a/backend/src/controllers/chatController.ts b/backend/src/controllers/chatController.ts deleted file mode 100644 index b09f759e..00000000 --- a/backend/src/controllers/chatController.ts +++ /dev/null @@ -1,304 +0,0 @@ -import { FastifyRequest, FastifyReply } from 'fastify'; -import { ModelType } from '../adapters/types.js'; -import { LLMFactory } from '../adapters/LLMFactory.js'; -import * as knowledgeBaseService from '../services/knowledgeBaseService.js'; -import { prisma } from '../config/database.js'; - -interface SendChatMessageBody { - content: string; - modelType: ModelType; - knowledgeBaseIds?: string[]; - conversationId?: string; // 可选:续接已有对话 -} - -/** - * 通用聊天Controller - * 无需项目和智能体,纯大模型对话 - */ -export class ChatController { - /** - * 发送消息(流式输出) - */ - async sendMessageStream( - request: FastifyRequest<{ Body: SendChatMessageBody }>, - reply: FastifyReply - ) { - try { - // TODO: 从JWT token获取userId - const userId = 'user-mock-001'; - - const { content, modelType, knowledgeBaseIds, conversationId } = request.body; - - console.log('💬 [ChatController] 收到通用对话请求', { - content, - modelType, - knowledgeBaseIds: knowledgeBaseIds || [], - conversationId, - }); - - // 验证modelType - if (modelType !== 'deepseek-v3' && modelType !== 'qwen3-72b' && modelType !== 'gemini-pro') { - reply.code(400).send({ - success: false, - message: `不支持的模型类型: ${modelType}`, - }); - return; - } - - // 获取或创建对话记录 - let conversation; - if (conversationId) { - // 验证对话是否存在且属于当前用户 - conversation = await prisma.generalConversation.findFirst({ - where: { - id: conversationId, - userId, - deletedAt: null, - }, - }); - - if (!conversation) { - reply.code(404).send({ - success: false, - message: '对话不存在', - }); - return; - } - } else { - // 创建新对话 - conversation = await prisma.generalConversation.create({ - data: { - userId, - title: content.substring(0, 50), // 用第一条消息的前50字作为标题 - modelName: modelType, - }, - }); - console.log('✅ [ChatController] 创建新对话', { conversationId: conversation.id }); - } - - // 检索知识库上下文 - let knowledgeBaseContext = ''; - if (knowledgeBaseIds && knowledgeBaseIds.length > 0) { - console.log('📚 [ChatController] 开始检索知识库'); - const knowledgeResults: string[] = []; - - for (const kbId of knowledgeBaseIds) { - try { - const searchResult = await knowledgeBaseService.searchKnowledgeBase( - userId, - kbId, - content, - 3 - ); - - if (searchResult.records && searchResult.records.length > 0) { - const kbInfo = await prisma.knowledgeBase.findUnique({ - where: { id: kbId }, - select: { name: true }, - }); - - knowledgeResults.push( - `【知识库:${kbInfo?.name || '未命名'}】\n` + - searchResult.records - .map((record: any, index: number) => { - const score = (record.score * 100).toFixed(1); - return `${index + 1}. [相关度${score}%] ${record.segment.content}`; - }) - .join('\n\n') - ); - } - } catch (error) { - console.error(`❌ [ChatController] 检索知识库失败 ${kbId}:`, error); - } - } - - if (knowledgeResults.length > 0) { - knowledgeBaseContext = knowledgeResults.join('\n\n---\n\n'); - console.log(`💾 [ChatController] 知识库上下文: ${knowledgeBaseContext.length} 字符`); - } - } - - // 获取历史消息(最近20条) - const historyMessages = await prisma.generalMessage.findMany({ - where: { - conversationId: conversation.id, - }, - orderBy: { - createdAt: 'desc', - }, - take: 20, - }); - historyMessages.reverse(); - console.log(`📜 [ChatController] 历史消息数: ${historyMessages.length}`); - - // 组装消息上下文 - const messages: any[] = [ - { - role: 'system', - content: '你是一个专业、友好的AI助手。当用户提供参考资料时,请优先基于参考资料回答。', - }, - ]; - - // 添加历史消息 - for (const msg of historyMessages) { - messages.push({ - role: msg.role, - content: msg.content, - }); - } - - // 添加当前用户消息 - let userContent = content; - if (knowledgeBaseContext) { - userContent = `${content}\n\n## 参考资料(来自知识库)\n${knowledgeBaseContext}`; - } - messages.push({ - role: 'user', - content: userContent, - }); - - // 设置SSE响应头 - reply.raw.writeHead(200, { - 'Content-Type': 'text/event-stream', - 'Cache-Control': 'no-cache', - Connection: 'keep-alive', - 'Access-Control-Allow-Origin': '*', - }); - - // 保存用户消息 - await prisma.generalMessage.create({ - data: { - conversationId: conversation.id, - role: 'user', - content, - metadata: { - knowledgeBaseIds, - }, - }, - }); - - // 流式输出 - const adapter = LLMFactory.getAdapter(modelType); - let fullContent = ''; - let usage: any = null; - - for await (const chunk of adapter.chatStream(messages, { - temperature: 0.7, - maxTokens: 2000, - })) { - fullContent += chunk.content; - - if (chunk.usage) { - usage = chunk.usage; - } - - // 发送SSE数据 - reply.raw.write(`data: ${JSON.stringify(chunk)}\n\n`); - } - - // 保存助手消息 - await prisma.generalMessage.create({ - data: { - conversationId: conversation.id, - role: 'assistant', - content: fullContent, - model: modelType, - tokens: usage?.totalTokens, - metadata: { - usage, - }, - }, - }); - - // 更新对话 - await prisma.generalConversation.update({ - where: { id: conversation.id }, - data: { - updatedAt: new Date(), - }, - }); - - // 发送完成信号 - reply.raw.write(`data: [DONE]\n\n`); - reply.raw.end(); - - console.log('✅ [ChatController] 对话完成'); - } catch (error: any) { - console.error('❌ [ChatController] 错误:', error); - reply.code(500).send({ - success: false, - message: error.message || '服务器错误', - }); - } - } - - /** - * 获取对话列表 - */ - async getConversations( - request: FastifyRequest, - reply: FastifyReply - ) { - try { - const userId = 'user-mock-001'; - - const conversations = await prisma.generalConversation.findMany({ - where: { - userId, - deletedAt: null, - }, - orderBy: { - updatedAt: 'desc', - }, - take: 50, - }); - - reply.send({ - success: true, - data: conversations, - }); - } catch (error: any) { - reply.code(500).send({ - success: false, - message: error.message || '获取对话列表失败', - }); - } - } - - /** - * 删除对话 - */ - async deleteConversation( - request: FastifyRequest<{ Params: { id: string } }>, - reply: FastifyReply - ) { - try { - const userId = 'user-mock-001'; - const { id } = request.params; - - await prisma.generalConversation.update({ - where: { - id, - userId, - }, - data: { - deletedAt: new Date(), - }, - }); - - reply.send({ - success: true, - message: '删除成功', - }); - } catch (error: any) { - reply.code(500).send({ - success: false, - message: error.message || '删除失败', - }); - } - } -} - -export const chatController = new ChatController(); - - diff --git a/backend/src/index.ts b/backend/src/index.ts index 9cd6e4ac..8ed8b9ec 100644 --- a/backend/src/index.ts +++ b/backend/src/index.ts @@ -3,11 +3,13 @@ import cors from '@fastify/cors'; import multipart from '@fastify/multipart'; import { config, validateEnv } from './config/env.js'; import { testDatabaseConnection, prisma } from './config/database.js'; -import { projectRoutes } from './routes/projects.js'; -import { agentRoutes } from './routes/agents.js'; -import { conversationRoutes } from './routes/conversations.js'; -import knowledgeBaseRoutes from './routes/knowledgeBases.js'; -import { chatRoutes } from './routes/chatRoutes.js'; +import { projectRoutes } from './legacy/routes/projects.js'; +import { agentRoutes } from './legacy/routes/agents.js'; +import { conversationRoutes } from './legacy/routes/conversations.js'; +import knowledgeBaseRoutes from './legacy/routes/knowledgeBases.js'; +import { chatRoutes } from './legacy/routes/chatRoutes.js'; +import { batchRoutes } from './legacy/routes/batchRoutes.js'; +import reviewRoutes from './legacy/routes/reviewRoutes.js'; // 全局处理BigInt序列化 @@ -93,6 +95,12 @@ await fastify.register(knowledgeBaseRoutes, { prefix: '/api/v1' }); // 注册通用对话路由 await fastify.register(chatRoutes, { prefix: '/api/v1' }); +// Phase 3: 注册批处理路由 +await fastify.register(batchRoutes, { prefix: '/api/v1' }); + +// 注册稿件审查路由 +await fastify.register(reviewRoutes, { prefix: '/api/v1' }); + // 启动服务器 const start = async () => { try { diff --git a/backend/src/controllers/agentController.ts b/backend/src/legacy/controllers/agentController.ts similarity index 100% rename from backend/src/controllers/agentController.ts rename to backend/src/legacy/controllers/agentController.ts diff --git a/backend/src/legacy/controllers/batchController.ts b/backend/src/legacy/controllers/batchController.ts new file mode 100644 index 00000000..ccb5b886 --- /dev/null +++ b/backend/src/legacy/controllers/batchController.ts @@ -0,0 +1,428 @@ +/** + * Phase 3: 批处理模式 - 批处理控制器 + * + * API路由: + * - POST /api/v1/batch/execute - 执行批处理任务 + * - GET /api/v1/batch/tasks/:taskId - 获取任务状态 + * - GET /api/v1/batch/tasks/:taskId/results - 获取任务结果 + * - POST /api/v1/batch/tasks/:taskId/retry-failed - 重试失败项 + */ + +import { FastifyRequest, FastifyReply } from 'fastify'; +import { executeBatchTask, retryFailedDocuments, BatchProgress } from '../services/batchService.js'; +import { prisma } from '../../config/database.js'; +import { ModelType } from '../../common/llm/adapters/types.js'; + +// ==================== 类型定义 ==================== + +interface ExecuteBatchBody { + kb_id: string; + document_ids: string[]; + template_type: 'preset' | 'custom'; + template_id?: string; + custom_prompt?: string; + model_type: ModelType; + task_name?: string; +} + +interface TaskIdParams { + taskId: string; +} + +// ==================== API处理器 ==================== + +/** + * POST /api/v1/batch/execute + * 执行批处理任务 + */ +export async function executeBatch( + request: FastifyRequest<{ Body: ExecuteBatchBody }>, + reply: FastifyReply +) { + try { + // TODO: 从JWT获取userId + const userId = 'user-mock-001'; + + const { + kb_id, + document_ids, + template_type, + template_id, + custom_prompt, + model_type, + task_name, + } = request.body; + + console.log('📦 [BatchController] 收到批处理请求', { + userId, + kbId: kb_id, + documentCount: document_ids.length, + templateType: template_type, + modelType: model_type, + }); + + // 验证参数 + if (!kb_id || !document_ids || document_ids.length === 0) { + return reply.code(400).send({ + success: false, + message: '缺少必要参数:kb_id 或 document_ids', + }); + } + + if (document_ids.length < 3) { + return reply.code(400).send({ + success: false, + message: '文献数量不能少于3篇', + }); + } + + if (document_ids.length > 50) { + return reply.code(400).send({ + success: false, + message: '文献数量不能超过50篇', + }); + } + + if (template_type === 'preset' && !template_id) { + return reply.code(400).send({ + success: false, + message: '预设模板类型需要提供 template_id', + }); + } + + if (template_type === 'custom' && !custom_prompt) { + return reply.code(400).send({ + success: false, + message: '自定义模板需要提供 custom_prompt', + }); + } + + // 验证模型类型 + const validModels: ModelType[] = ['deepseek-v3', 'qwen3-72b', 'qwen-long']; + if (!validModels.includes(model_type)) { + return reply.code(400).send({ + success: false, + message: `不支持的模型类型: ${model_type}`, + }); + } + + // 验证知识库是否存在 + const kb = await prisma.knowledgeBase.findUnique({ + where: { id: kb_id }, + }); + + if (!kb) { + return reply.code(404).send({ + success: false, + message: `知识库不存在: ${kb_id}`, + }); + } + + // 验证文档是否都存在 + const documents = await prisma.document.findMany({ + where: { + id: { in: document_ids }, + kbId: kb_id, + }, + }); + + if (documents.length !== document_ids.length) { + return reply.code(400).send({ + success: false, + message: `部分文档不存在或不属于该知识库`, + }); + } + + // 获取WebSocket实例(用于进度推送) + const io = (request.server as any).io; + + // 先创建任务记录获取taskId + const taskPreview = await prisma.batchTask.create({ + data: { + userId, + kbId: kb_id, + name: task_name || `批处理任务_${new Date().toLocaleString('zh-CN')}`, + templateType: template_type, + templateId: template_id || null, + prompt: custom_prompt || template_id || '', + status: 'processing', + totalDocuments: document_ids.length, + modelType: model_type, + concurrency: 3, + startedAt: new Date(), + }, + }); + + const taskId = taskPreview.id; + console.log(`✅ [BatchController] 创建任务: ${taskId}`); + + // 执行批处理任务(异步) + executeBatchTask({ + userId, + kbId: kb_id, + documentIds: document_ids, + templateType: template_type, + templateId: template_id, + customPrompt: custom_prompt, + modelType: model_type, + taskName: task_name, + existingTaskId: taskId, // 使用已创建的任务ID + onProgress: (progress: BatchProgress) => { + // WebSocket推送进度 + if (io) { + io.to(userId).emit('batch-progress', progress); + } + }, + }) + .then((result) => { + console.log(`🎉 [BatchController] 批处理任务完成: ${result.taskId}`); + // 推送完成事件 + if (io) { + io.to(userId).emit('batch-completed', { + task_id: result.taskId, + status: result.status, + }); + } + }) + .catch((error) => { + console.error(`❌ [BatchController] 批处理任务失败:`, error); + // 推送失败事件 + if (io) { + io.to(userId).emit('batch-failed', { + task_id: 'unknown', + error: error.message, + }); + } + }); + + // 立即返回任务ID(任务在后台执行) + reply.send({ + success: true, + message: '批处理任务已开始', + data: { + task_id: taskId, + status: 'processing', + websocket_event: 'batch-progress', + }, + }); + } catch (error: any) { + console.error('❌ [BatchController] 执行批处理失败:', error); + reply.code(500).send({ + success: false, + message: error.message || '执行批处理任务失败', + }); + } +} + +/** + * GET /api/v1/batch/tasks/:taskId + * 获取任务状态 + */ +export async function getTask( + request: FastifyRequest<{ Params: TaskIdParams }>, + reply: FastifyReply +) { + try { + const { taskId } = request.params; + + const task = await prisma.batchTask.findUnique({ + where: { id: taskId }, + select: { + id: true, + name: true, + status: true, + totalDocuments: true, + completedCount: true, + failedCount: true, + modelType: true, + startedAt: true, + completedAt: true, + durationSeconds: true, + createdAt: true, + }, + }); + + if (!task) { + return reply.code(404).send({ + success: false, + message: `任务不存在: ${taskId}`, + }); + } + + reply.send({ + success: true, + data: { + id: task.id, + name: task.name, + status: task.status, + total_documents: task.totalDocuments, + completed_count: task.completedCount, + failed_count: task.failedCount, + model_type: task.modelType, + started_at: task.startedAt, + completed_at: task.completedAt, + duration_seconds: task.durationSeconds, + created_at: task.createdAt, + }, + }); + } catch (error: any) { + console.error('❌ [BatchController] 获取任务失败:', error); + reply.code(500).send({ + success: false, + message: error.message || '获取任务失败', + }); + } +} + +/** + * GET /api/v1/batch/tasks/:taskId/results + * 获取任务结果 + */ +export async function getTaskResults( + request: FastifyRequest<{ Params: TaskIdParams }>, + reply: FastifyReply +) { + try { + const { taskId } = request.params; + + // 获取任务信息 + const task = await prisma.batchTask.findUnique({ + where: { id: taskId }, + include: { + results: { + include: { + document: { + select: { + filename: true, + tokensCount: true, + }, + }, + }, + orderBy: { + createdAt: 'asc', + }, + }, + }, + }); + + if (!task) { + return reply.code(404).send({ + success: false, + message: `任务不存在: ${taskId}`, + }); + } + + // 格式化结果 + const results = task.results.map((r, index) => ({ + id: r.id, + index: index + 1, + document_id: r.documentId, + document_name: r.document.filename, + status: r.status, + data: r.data, + raw_output: r.rawOutput, + error_message: r.errorMessage, + processing_time_ms: r.processingTimeMs, + tokens_used: r.tokensUsed, + created_at: r.createdAt, + })); + + reply.send({ + success: true, + data: { + task: { + id: task.id, + name: task.name, + status: task.status, + template_type: task.templateType, + template_id: task.templateId, + total_documents: task.totalDocuments, + completed_count: task.completedCount, + failed_count: task.failedCount, + duration_seconds: task.durationSeconds, + created_at: task.createdAt, + completed_at: task.completedAt, + }, + results, + }, + }); + } catch (error: any) { + console.error('❌ [BatchController] 获取任务结果失败:', error); + reply.code(500).send({ + success: false, + message: error.message || '获取任务结果失败', + }); + } +} + +/** + * POST /api/v1/batch/tasks/:taskId/retry-failed + * 重试失败的文档 + */ +export async function retryFailed( + request: FastifyRequest<{ Params: TaskIdParams }>, + reply: FastifyReply +) { + try { + const { taskId } = request.params; + const userId = 'user-mock-001'; // TODO: 从JWT获取 + + // 获取WebSocket实例 + const io = (request.server as any).io; + + // 执行重试(异步) + retryFailedDocuments(taskId, (progress: BatchProgress) => { + if (io) { + io.to(userId).emit('batch-progress', progress); + } + }) + .then((result) => { + console.log(`✅ [BatchController] 重试完成: ${result.retriedCount}篇`); + }) + .catch((error) => { + console.error(`❌ [BatchController] 重试失败:`, error); + }); + + reply.send({ + success: true, + message: '已开始重试失败的文档', + }); + } catch (error: any) { + console.error('❌ [BatchController] 重试失败:', error); + reply.code(500).send({ + success: false, + message: error.message || '重试失败', + }); + } +} + +/** + * GET /api/v1/batch/templates + * 获取所有预设模板 + */ +export async function getTemplates( + request: FastifyRequest, + reply: FastifyReply +) { + try { + const { getAllTemplates } = await import('../templates/clinicalResearch.js'); + const templates = getAllTemplates(); + + reply.send({ + success: true, + data: templates.map(t => ({ + id: t.id, + name: t.name, + description: t.description, + output_fields: t.outputFields, + })), + }); + } catch (error: any) { + console.error('❌ [BatchController] 获取模板失败:', error); + reply.code(500).send({ + success: false, + message: error.message || '获取模板失败', + }); + } +} + diff --git a/backend/src/legacy/controllers/chatController.ts b/backend/src/legacy/controllers/chatController.ts new file mode 100644 index 00000000..a8305800 --- /dev/null +++ b/backend/src/legacy/controllers/chatController.ts @@ -0,0 +1,566 @@ +import { FastifyRequest, FastifyReply } from 'fastify'; +import { ModelType } from '../../common/llm/adapters/types.js'; +import { LLMFactory } from '../../common/llm/adapters/LLMFactory.js'; +import * as knowledgeBaseService from '../services/knowledgeBaseService.js'; +import { prisma } from '../../config/database.js'; + +/** + * 引用信息接口 + */ +interface Citation { + id: number; + fileName: string; + position: number; + score: number; + content: string; +} + +/** + * 提取文本片段(用于引用上下文) + */ +function extractContextPreview(text: string, maxLength: number = 100): string { + if (!text) return ''; + + const cleaned = text.replace(/\s+/g, ' ').trim(); + if (cleaned.length <= maxLength) { + return cleaned; + } + + const truncated = cleaned.substring(0, maxLength); + const lastPunctuation = Math.max( + truncated.lastIndexOf('。'), + truncated.lastIndexOf('!'), + truncated.lastIndexOf('?'), + truncated.lastIndexOf('.'), + truncated.lastIndexOf('!'), + truncated.lastIndexOf('?') + ); + + if (lastPunctuation > maxLength * 0.5) { + return truncated.substring(0, lastPunctuation + 1); + } + + return truncated + '...'; +} + +/** + * 格式化引用清单 + */ +function formatCitations(citations: Citation[]): string { + if (citations.length === 0) return ''; + + let result = '\n\n---\n\n📚 **参考文献**\n\n'; + + for (const cite of citations) { + const scorePercent = (cite.score * 100).toFixed(0); + const preview = extractContextPreview(cite.content, 100); + + // 使用HTML span标签给引用编号添加id,方便跳转 + result += `[${cite.id}] 📄 **${cite.fileName}** - 第${cite.position}段 (相关度${scorePercent}%)\n`; + result += ` "${preview}"\n\n`; + } + + return result; +} + +interface SendChatMessageBody { + content: string; + modelType: ModelType; + knowledgeBaseIds?: string[]; + documentIds?: string[]; // Phase 2: 逐篇精读模式 - 限定文档范围 + fullTextDocumentIds?: string[]; // Phase 2: 全文阅读模式 - 传递全文 + conversationId?: string; // 可选:续接已有对话 +} + +/** + * 通用聊天Controller + * 无需项目和智能体,纯大模型对话 + */ +export class ChatController { + /** + * 发送消息(流式输出) + */ + async sendMessageStream( + request: FastifyRequest<{ Body: SendChatMessageBody }>, + reply: FastifyReply + ) { + try { + // TODO: 从JWT token获取userId + const userId = 'user-mock-001'; + + const { content, modelType, knowledgeBaseIds, documentIds, fullTextDocumentIds, conversationId } = request.body; + + console.log('💬 [ChatController] 收到通用对话请求', { + content, + modelType, + knowledgeBaseIds: knowledgeBaseIds || [], + documentIds: documentIds || [], + fullTextDocumentIds: fullTextDocumentIds || [], + conversationId, + }); + + // 验证modelType + if (modelType !== 'deepseek-v3' && modelType !== 'qwen3-72b' && modelType !== 'qwen-long' && modelType !== 'gemini-pro') { + reply.code(400).send({ + success: false, + message: `不支持的模型类型: ${modelType}`, + }); + return; + } + + // 获取或创建对话记录 + let conversation; + if (conversationId) { + // 验证对话是否存在且属于当前用户 + conversation = await prisma.generalConversation.findFirst({ + where: { + id: conversationId, + userId, + deletedAt: null, + }, + }); + + if (!conversation) { + reply.code(404).send({ + success: false, + message: '对话不存在', + }); + return; + } + } else { + // 创建新对话 + conversation = await prisma.generalConversation.create({ + data: { + userId, + title: content.substring(0, 50), // 用第一条消息的前50字作为标题 + modelName: modelType, + }, + }); + console.log('✅ [ChatController] 创建新对话', { conversationId: conversation.id }); + } + + // 检索知识库上下文 + let knowledgeBaseContext = ''; + const allCitations: Citation[] = []; // 存储所有引用信息 + let citationCounter = 1; // 全局引用计数器 + + // Phase 2: 全文阅读模式 - 传递完整文献全文 + if (fullTextDocumentIds && fullTextDocumentIds.length > 0) { + console.log('📚 [ChatController] 全文阅读模式 - 加载文献全文', { + documentCount: fullTextDocumentIds.length, + }); + + try { + // 获取所有选中文档的全文 + const documents = await prisma.document.findMany({ + where: { + id: { in: fullTextDocumentIds }, + }, + select: { + id: true, + filename: true, + extractedText: true, + tokensCount: true, + }, + orderBy: { + filename: 'asc', // 按文件名排序 + }, + }); + + console.log(`📄 [ChatController] 加载了 ${documents.length} 篇文献全文`); + + // 过滤掉没有extractedText的文档 + const validDocuments = documents.filter(doc => doc.extractedText && doc.extractedText.trim().length > 0); + + if (validDocuments.length === 0) { + console.warn('⚠️ [ChatController] 所有文档都没有提取文本,无法使用全文模式'); + } else if (validDocuments.length < documents.length) { + console.warn(`⚠️ [ChatController] ${documents.length - validDocuments.length} 篇文档没有提取文本,已跳过`); + } + + // 组装全文上下文,每篇文献用明确的标记分隔 + const fullTextParts: string[] = []; + + for (let i = 0; i < validDocuments.length; i++) { + const doc = validDocuments[i]; + const docNumber = i + 1; + + // 为每篇文献添加引用信息 + allCitations.push({ + id: docNumber, + fileName: doc.filename, + position: 0, // 全文没有position概念 + score: 1.0, // 全文模式相关度100% + content: doc.extractedText?.substring(0, 200) || '(无内容)', + }); + + // 格式:【文献N:文件名】\n全文内容 + fullTextParts.push( + `【文献${docNumber}:${doc.filename}】\n\n${doc.extractedText}` + ); + } + + knowledgeBaseContext = fullTextParts.join('\n\n---\n\n'); + + const totalTokens = validDocuments.reduce((sum, doc) => sum + (doc.tokensCount || 0), 0); + + console.log(`📚 [ChatController] 全文上下文已组装`, { + totalDocuments: validDocuments.length, + totalCharacters: knowledgeBaseContext.length, + totalTokens: totalTokens, + estimatedTokens: Math.round(knowledgeBaseContext.length / 2.5), // 粗略估算 + }); + + // ⚠️ 检查Token限制(Qwen-Long输入限制:1M tokens) + const QWEN_LONG_INPUT_LIMIT = 1000000; + const SYSTEM_OVERHEAD = 10000; // 系统提示、格式等开销 + const SAFE_INPUT_LIMIT = QWEN_LONG_INPUT_LIMIT - SYSTEM_OVERHEAD; + + if (totalTokens > SAFE_INPUT_LIMIT) { + const errorMsg = `输入Token数量 (${totalTokens}) 超出Qwen-Long模型限制 (${SAFE_INPUT_LIMIT})。请减少文献数量后重试。`; + console.error(`❌ [ChatController] ${errorMsg}`); + + // 返回错误信息给前端 + reply.raw.write(`data: ${JSON.stringify({ + content: `\n\n⚠️ **Token数量超限**\n\n${errorMsg}\n\n**建议**:\n- 当前选中 ${validDocuments.length} 篇文献,共 ${totalTokens.toLocaleString()} tokens\n- 请减少到 ${Math.floor(validDocuments.length * SAFE_INPUT_LIMIT / totalTokens)} 篇以内\n- 或使用"逐篇精读"模式深入分析单篇文献`, + role: 'assistant', + error: true, + })}\n\n`); + reply.raw.write('data: [DONE]\n\n'); + return reply.raw.end(); + } + + // 警告:如果接近限制 + if (totalTokens > SAFE_INPUT_LIMIT * 0.8) { + console.warn(`⚠️ [ChatController] Token数量接近限制 (${totalTokens}/${SAFE_INPUT_LIMIT}), 建议减少文献数量`); + } + + } catch (error) { + console.error('❌ [ChatController] 加载文献全文失败:', error); + // 不throw错误,继续执行(可能没有全文也能正常对话) + } + } + // RAG检索模式(逐篇精读或通用对话) + else if (knowledgeBaseIds && knowledgeBaseIds.length > 0) { + console.log('📚 [ChatController] 开始检索知识库'); + const knowledgeResults: string[] = []; + + // Phase 2: 如果指定了文档ID(逐篇精读模式),需要更多结果用于过滤 + const topK = documentIds && documentIds.length > 0 ? 50 : 15; + + for (const kbId of knowledgeBaseIds) { + try { + const searchResult = await knowledgeBaseService.searchKnowledgeBase( + userId, + kbId, + content, + topK + ); + + if (searchResult.records && searchResult.records.length > 0) { + let records = searchResult.records; + + // Phase 2: 逐篇精读模式 - 过滤出指定文档的结果 + if (documentIds && documentIds.length > 0) { + console.log(`🔍 [ChatController] 逐篇精读模式 - 过滤文档`, { documentIds }); + + // 获取文档的Dify ID映射 + const documents = await prisma.document.findMany({ + where: { + id: { in: documentIds }, + knowledgeBase: { + id: kbId, + }, + }, + select: { + id: true, + filename: true, + difyDocumentId: true, + }, + }); + + const difyDocIds = documents.map(d => d.difyDocumentId).filter(Boolean); + console.log(`📄 [ChatController] 目标Dify文档ID:`, difyDocIds); + + // 过滤结果 + const beforeCount = records.length; + records = records.filter((record: any) => { + const docId = record.segment?.document?.id || record.document_id; + return docId && difyDocIds.includes(docId); + }); + + console.log(`✂️ [ChatController] 过滤结果: ${beforeCount} → ${records.length}`); + + // 如果过滤后结果太少,警告 + if (records.length === 0) { + console.warn('⚠️ [ChatController] 过滤后没有结果,可能是文档ID不匹配'); + } + + // 只取前15个 + records = records.slice(0, 15); + } + + if (records.length > 0) { + const kbInfo = await prisma.knowledgeBase.findUnique({ + where: { id: kbId }, + select: { name: true }, + }); + + // 优化格式:使用[来源N]标记,便于AI引用 + const formattedResult = `【知识库:${kbInfo?.name || '未命名'}】\n` + + records + .map((record: any) => { + const citationId = citationCounter++; + const score = (record.score * 100).toFixed(1); + + // 保存引用信息 + allCitations.push({ + id: citationId, + fileName: record.segment?.document?.name || record.document_name || '未知文档', + position: record.segment?.position || record.segment_position || 0, + score: record.score, + content: record.segment?.content || record.content || '', + }); + + return `[来源${citationId}] [相关度${score}%]\n${record.segment?.content || record.content}`; + }) + .join('\n\n'); + + knowledgeResults.push(formattedResult); + } + } + } catch (error) { + console.error(`❌ [ChatController] 检索知识库失败 ${kbId}:`, error); + } + } + + if (knowledgeResults.length > 0) { + knowledgeBaseContext = knowledgeResults.join('\n\n---\n\n'); + console.log(`💾 [ChatController] 知识库上下文: ${knowledgeBaseContext.length} 字符`); + console.log(`📚 [ChatController] 收集到 ${allCitations.length} 个引用`); + } + } + + // 获取历史消息(最近20条) + const historyMessages = await prisma.generalMessage.findMany({ + where: { + conversationId: conversation.id, + }, + orderBy: { + createdAt: 'desc', + }, + take: 20, + }); + historyMessages.reverse(); + console.log(`📜 [ChatController] 历史消息数: ${historyMessages.length}`); + + // 组装消息上下文 + let systemPrompt = '你是一个专业、友好的AI助手。当用户提供参考资料时,请优先基于参考资料回答。'; + + // 全文阅读模式的系统提示 + if (fullTextDocumentIds && fullTextDocumentIds.length > 0) { + systemPrompt = '你是一个专业的学术文献分析助手。用户会提供多篇文献的完整全文,每篇文献用【文献N:文件名】标记。请认真阅读所有文献,进行深入的综合分析。在回答时请引用具体文献,使用【文献N】格式。你的优势是能够看到所有文献的全貌,进行跨文献的比较、归纳和总结。'; + } + + const messages: any[] = [ + { + role: 'system', + content: systemPrompt, + }, + ]; + + // 添加历史消息 + for (const msg of historyMessages) { + messages.push({ + role: msg.role, + content: msg.content, + }); + } + + // 添加当前用户消息 + let userContent = content; + if (knowledgeBaseContext) { + // 全文阅读模式的提示 + if (fullTextDocumentIds && fullTextDocumentIds.length > 0) { + userContent = `${content}\n\n## 参考资料(文献全文)\n\n**重要提示**:下面提供的是完整的文献全文。每篇文献用【文献N:文件名】标记。请在回答时引用文献,格式如"根据【文献1】..."或"研究表明【文献2】【文献3】..."。你可以综合分析所有文献,进行跨文献的比较和总结。\n\n${knowledgeBaseContext}`; + } + // RAG检索模式的提示 + else { + userContent = `${content}\n\n## 参考资料(来自知识库)\n\n**重要提示**:下面提供的文献片段已经用[来源N]进行了标记。请在回答中引用具体来源时使用对应的编号,如"根据[来源1]..."或"研究表明[来源3][来源5]..."。系统会在你回答结束后自动显示完整的引用清单。\n\n${knowledgeBaseContext}`; + } + } + messages.push({ + role: 'user', + content: userContent, + }); + + // 设置SSE响应头 + reply.raw.writeHead(200, { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + Connection: 'keep-alive', + 'Access-Control-Allow-Origin': '*', + }); + + // 保存用户消息 + await prisma.generalMessage.create({ + data: { + conversationId: conversation.id, + role: 'user', + content, + metadata: { + knowledgeBaseIds, + }, + }, + }); + + // 流式输出 + const adapter = LLMFactory.getAdapter(modelType); + let fullContent = ''; + let usage: any = null; + + // Phase 2: 全文阅读模式需要更大的输出空间(用于综合分析、引用等) + const maxOutputTokens = fullTextDocumentIds && fullTextDocumentIds.length > 0 + ? 6000 // 全文模式:需要更长的回答空间 + : 2000; // 其他模式:常规长度 + + console.log(`🤖 [ChatController] 开始调用LLM`, { + model: modelType, + maxOutputTokens, + mode: fullTextDocumentIds && fullTextDocumentIds.length > 0 ? '全文阅读' : '其他', + }); + + for await (const chunk of adapter.chatStream(messages, { + temperature: 0.7, + maxTokens: maxOutputTokens, + })) { + fullContent += chunk.content; + + if (chunk.usage) { + usage = chunk.usage; + } + + // 发送SSE数据 + reply.raw.write(`data: ${JSON.stringify(chunk)}\n\n`); + } + + // AI回答完毕后,追加引用清单 + if (allCitations.length > 0) { + console.log(`📚 [ChatController] 追加 ${allCitations.length} 个引用清单`); + const citationsText = formatCitations(allCitations); + fullContent += citationsText; + + // 将引用清单也流式输出 + const citationChunk = { + content: citationsText, + role: 'assistant' as const, + }; + reply.raw.write(`data: ${JSON.stringify(citationChunk)}\n\n`); + } + + // 保存助手消息 + await prisma.generalMessage.create({ + data: { + conversationId: conversation.id, + role: 'assistant', + content: fullContent, + model: modelType, + tokens: usage?.totalTokens, + metadata: { + usage, + }, + }, + }); + + // 更新对话 + await prisma.generalConversation.update({ + where: { id: conversation.id }, + data: { + updatedAt: new Date(), + }, + }); + + // 发送完成信号 + reply.raw.write(`data: [DONE]\n\n`); + reply.raw.end(); + + console.log('✅ [ChatController] 对话完成'); + } catch (error: any) { + console.error('❌ [ChatController] 错误:', error); + reply.code(500).send({ + success: false, + message: error.message || '服务器错误', + }); + } + } + + /** + * 获取对话列表 + */ + async getConversations( + _request: FastifyRequest, + reply: FastifyReply + ) { + try { + const userId = 'user-mock-001'; + + const conversations = await prisma.generalConversation.findMany({ + where: { + userId, + deletedAt: null, + }, + orderBy: { + updatedAt: 'desc', + }, + take: 50, + }); + + reply.send({ + success: true, + data: conversations, + }); + } catch (error: any) { + reply.code(500).send({ + success: false, + message: error.message || '获取对话列表失败', + }); + } + } + + /** + * 删除对话 + */ + async deleteConversation( + request: FastifyRequest<{ Params: { id: string } }>, + reply: FastifyReply + ) { + try { + const userId = 'user-mock-001'; + const { id } = request.params; + + await prisma.generalConversation.update({ + where: { + id, + userId, + }, + data: { + deletedAt: new Date(), + }, + }); + + reply.send({ + success: true, + message: '删除成功', + }); + } catch (error: any) { + reply.code(500).send({ + success: false, + message: error.message || '删除失败', + }); + } + } +} + +export const chatController = new ChatController(); + + diff --git a/backend/src/controllers/conversationController.ts b/backend/src/legacy/controllers/conversationController.ts similarity index 97% rename from backend/src/controllers/conversationController.ts rename to backend/src/legacy/controllers/conversationController.ts index 13a421d2..a7fbf4c2 100644 --- a/backend/src/controllers/conversationController.ts +++ b/backend/src/legacy/controllers/conversationController.ts @@ -131,7 +131,7 @@ export class ConversationController { request.body; // 验证modelType - if (modelType !== 'deepseek-v3' && modelType !== 'qwen3-72b' && modelType !== 'gemini-pro') { + if (modelType !== 'deepseek-v3' && modelType !== 'qwen3-72b' && modelType !== 'qwen-long' && modelType !== 'gemini-pro') { reply.code(400).send({ success: false, message: `不支持的模型类型: ${modelType}`, @@ -183,7 +183,7 @@ export class ConversationController { request.body; // 验证modelType - if (modelType !== 'deepseek-v3' && modelType !== 'qwen3-72b' && modelType !== 'gemini-pro') { + if (modelType !== 'deepseek-v3' && modelType !== 'qwen3-72b' && modelType !== 'qwen-long' && modelType !== 'gemini-pro') { reply.code(400).send({ success: false, message: `不支持的模型类型: ${modelType}`, diff --git a/backend/src/controllers/documentController.ts b/backend/src/legacy/controllers/documentController.ts similarity index 80% rename from backend/src/controllers/documentController.ts rename to backend/src/legacy/controllers/documentController.ts index ef3fcaf7..1e0514d1 100644 --- a/backend/src/controllers/documentController.ts +++ b/backend/src/legacy/controllers/documentController.ts @@ -257,4 +257,58 @@ export async function reprocessDocument( } } +/** + * Phase 2: 获取文档全文(用于逐篇精读模式) + */ +export async function getDocumentFullText( + request: FastifyRequest<{ + Params: { + id: string; + }; + }>, + reply: FastifyReply +) { + try { + const { id } = request.params; + + const document = await documentService.getDocumentById(MOCK_USER_ID, id); + + // 返回完整的文档信息 + return reply.send({ + success: true, + data: { + documentId: document.id, + filename: document.filename, + fileType: document.fileType, + fileSizeBytes: document.fileSizeBytes, + extractedText: (document as any).extractedText || null, + charCount: (document as any).charCount || null, + tokensCount: document.tokensCount || null, + extractionMethod: (document as any).extractionMethod || null, + extractionQuality: (document as any).extractionQuality || null, + language: (document as any).language || null, + metadata: { + uploadedAt: document.uploadedAt, + processedAt: document.processedAt, + status: document.status, + }, + }, + }); + } catch (error: any) { + console.error('Failed to get document full text:', error); + + if (error.message.includes('not found')) { + return reply.status(404).send({ + success: false, + message: error.message, + }); + } + + return reply.status(500).send({ + success: false, + message: error.message || 'Failed to get document full text', + }); + } +} + diff --git a/backend/src/controllers/knowledgeBaseController.ts b/backend/src/legacy/controllers/knowledgeBaseController.ts similarity index 83% rename from backend/src/controllers/knowledgeBaseController.ts rename to backend/src/legacy/controllers/knowledgeBaseController.ts index 4d3f7a01..46a220aa 100644 --- a/backend/src/controllers/knowledgeBaseController.ts +++ b/backend/src/legacy/controllers/knowledgeBaseController.ts @@ -219,7 +219,7 @@ export async function searchKnowledgeBase( }); } - const topK = top_k ? parseInt(top_k, 10) : 3; + const topK = top_k ? parseInt(top_k, 10) : 15; // Phase 1优化:默认从3增加到15 const results = await knowledgeBaseService.searchKnowledgeBase( MOCK_USER_ID, @@ -289,3 +289,53 @@ export async function getKnowledgeBaseStats( } } +/** + * 获取知识库文档选择(Phase 2: 全文阅读模式) + */ +export async function getDocumentSelection( + request: FastifyRequest<{ + Params: { + id: string; + }; + Querystring: { + max_files?: string; + max_tokens?: string; + }; + }>, + reply: FastifyReply +) { + try { + const { id } = request.params; + const { max_files, max_tokens } = request.query; + + const maxFiles = max_files ? parseInt(max_files, 10) : undefined; + const maxTokens = max_tokens ? parseInt(max_tokens, 10) : undefined; + + const selection = await knowledgeBaseService.getDocumentSelection( + MOCK_USER_ID, + id, + maxFiles, + maxTokens + ); + + return reply.send({ + success: true, + data: selection, + }); + } catch (error: any) { + console.error('Failed to get document selection:', error); + + if (error.message.includes('not found')) { + return reply.status(404).send({ + success: false, + message: error.message, + }); + } + + return reply.status(500).send({ + success: false, + message: error.message || 'Failed to get document selection', + }); + } +} + diff --git a/backend/src/controllers/projectController.ts b/backend/src/legacy/controllers/projectController.ts similarity index 100% rename from backend/src/controllers/projectController.ts rename to backend/src/legacy/controllers/projectController.ts diff --git a/backend/src/legacy/controllers/reviewController.ts b/backend/src/legacy/controllers/reviewController.ts new file mode 100644 index 00000000..8336f061 --- /dev/null +++ b/backend/src/legacy/controllers/reviewController.ts @@ -0,0 +1,292 @@ +import type { FastifyRequest, FastifyReply } from 'fastify'; +import * as reviewService from '../services/reviewService.js'; +import { ModelType } from '../../common/llm/adapters/types.js'; + +// Mock用户ID(实际应从JWT token中获取) +const MOCK_USER_ID = 'user-mock-001'; + +/** + * 上传稿件并开始审查 + * POST /api/review/upload + */ +export async function uploadManuscript( + request: FastifyRequest<{ + Body: { + modelType?: string; + }; + }>, + reply: FastifyReply +) { + try { + console.log('📤 开始上传稿件进行审查...'); + + // 获取上传的文件 + const data = await request.file(); + + if (!data) { + console.error('❌ 没有接收到文件'); + return reply.status(400).send({ + success: false, + message: 'No file uploaded', + }); + } + + console.log(`📄 接收到文件: ${data.filename}, 类型: ${data.mimetype}`); + + const file = await data.toBuffer(); + const filename = data.filename; + const fileType = data.mimetype; + const fileSizeBytes = file.length; + + // 文件大小限制(5MB,稿件通常不会太大) + const maxSize = 5 * 1024 * 1024; + console.log(`📊 文件大小: ${(fileSizeBytes / 1024 / 1024).toFixed(2)}MB (限制: 5MB)`); + + if (fileSizeBytes > maxSize) { + console.error(`❌ 文件太大: ${(fileSizeBytes / 1024 / 1024).toFixed(2)}MB`); + return reply.status(400).send({ + success: false, + message: 'File size exceeds 5MB limit', + }); + } + + // 文件类型限制(仅支持Word文档) + const allowedTypes = [ + 'application/msword', // .doc + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', // .docx + ]; + + console.log(`🔍 检查文件类型: ${fileType}`); + if (!allowedTypes.includes(fileType)) { + console.error(`❌ 不支持的文件类型: ${fileType}`); + return reply.status(400).send({ + success: false, + message: 'File type not supported. Only Word documents (.doc, .docx) are allowed', + }); + } + + // 获取模型类型(默认deepseek-v3) + const modelType = (data.fields.modelType?.value || 'deepseek-v3') as ModelType; + + // 验证模型类型 + const validModels: ModelType[] = ['deepseek-v3', 'qwen3-72b', 'qwen-long']; + if (!validModels.includes(modelType)) { + return reply.status(400).send({ + success: false, + message: `Invalid model type. Allowed: ${validModels.join(', ')}`, + }); + } + + console.log(`🤖 使用模型: ${modelType}`); + + // 调用服务层进行审查 + console.log('⚙️ 调用审查服务...'); + const task = await reviewService.reviewManuscript(file, filename, MOCK_USER_ID, modelType); + + console.log(`✅ 审查任务已创建: ${task.id}`); + + return reply.send({ + success: true, + message: 'Manuscript uploaded successfully. Review task created.', + data: { + taskId: task.id, + fileName: task.fileName, + status: task.status, + createdAt: task.createdAt, + }, + }); + } catch (error) { + console.error('❌ 上传稿件失败:', error); + return reply.status(500).send({ + success: false, + message: error instanceof Error ? error.message : 'Upload failed', + }); + } +} + +/** + * 获取任务状态 + * GET /api/review/tasks/:taskId + */ +export async function getTaskStatus( + request: FastifyRequest<{ + Params: { + taskId: string; + }; + }>, + reply: FastifyReply +) { + try { + const { taskId } = request.params; + console.log(`🔍 查询任务状态: ${taskId}`); + + const task = await reviewService.getReviewTask(MOCK_USER_ID, taskId); + + console.log(`✅ 任务状态: ${task.status}`); + + return reply.send({ + success: true, + data: { + id: task.id, + fileName: task.fileName, + fileSize: task.fileSize, + status: task.status, + wordCount: task.wordCount, + overallScore: task.overallScore, + modelUsed: task.modelUsed, + createdAt: task.createdAt, + startedAt: task.startedAt, + completedAt: task.completedAt, + durationSeconds: task.durationSeconds, + errorMessage: task.errorMessage, + }, + }); + } catch (error) { + console.error('❌ 查询任务状态失败:', error); + return reply.status(404).send({ + success: false, + message: error instanceof Error ? error.message : 'Task not found', + }); + } +} + +/** + * 获取审查报告 + * GET /api/review/tasks/:taskId/report + */ +export async function getTaskReport( + request: FastifyRequest<{ + Params: { + taskId: string; + }; + }>, + reply: FastifyReply +) { + try { + const { taskId } = request.params; + console.log(`📊 获取审查报告: ${taskId}`); + + const report = await reviewService.getReviewReport(MOCK_USER_ID, taskId); + + console.log(`✅ 报告已生成`); + + return reply.send({ + success: true, + data: report, + }); + } catch (error) { + console.error('❌ 获取报告失败:', error); + + // 如果报告尚未完成,返回202状态 + if (error instanceof Error && error.message.includes('not ready yet')) { + return reply.status(202).send({ + success: false, + message: error.message, + }); + } + + return reply.status(404).send({ + success: false, + message: error instanceof Error ? error.message : 'Report not found', + }); + } +} + +/** + * 获取任务列表 + * GET /api/review/tasks + */ +export async function getTaskList( + request: FastifyRequest<{ + Querystring: { + page?: string; + limit?: string; + }; + }>, + reply: FastifyReply +) { + try { + const page = parseInt(request.query.page || '1', 10); + const limit = parseInt(request.query.limit || '20', 10); + + console.log(`📋 获取任务列表: page=${page}, limit=${limit}`); + + const result = await reviewService.getReviewTasks(MOCK_USER_ID, page, limit); + + console.log(`✅ 找到 ${result.tasks.length} 个任务`); + + return reply.send({ + success: true, + data: result.tasks, + pagination: result.pagination, + }); + } catch (error) { + console.error('❌ 获取任务列表失败:', error); + return reply.status(500).send({ + success: false, + message: error instanceof Error ? error.message : 'Failed to get task list', + }); + } +} + +/** + * 删除任务 + * DELETE /api/review/tasks/:taskId + */ +export async function deleteTask( + request: FastifyRequest<{ + Params: { + taskId: string; + }; + }>, + reply: FastifyReply +) { + try { + const { taskId } = request.params; + console.log(`🗑️ 删除任务: ${taskId}`); + + await reviewService.deleteReviewTask(MOCK_USER_ID, taskId); + + console.log(`✅ 任务已删除`); + + return reply.send({ + success: true, + message: 'Task deleted successfully', + }); + } catch (error) { + console.error('❌ 删除任务失败:', error); + return reply.status(404).send({ + success: false, + message: error instanceof Error ? error.message : 'Failed to delete task', + }); + } +} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/backend/src/routes/agents.ts b/backend/src/legacy/routes/agents.ts similarity index 100% rename from backend/src/routes/agents.ts rename to backend/src/legacy/routes/agents.ts diff --git a/backend/src/legacy/routes/batchRoutes.ts b/backend/src/legacy/routes/batchRoutes.ts new file mode 100644 index 00000000..929595c3 --- /dev/null +++ b/backend/src/legacy/routes/batchRoutes.ts @@ -0,0 +1,38 @@ +/** + * Phase 3: 批处理模式 - 路由配置 + */ + +import { FastifyInstance } from 'fastify'; +import { + executeBatch, + getTask, + getTaskResults, + retryFailed, + getTemplates, +} from '../controllers/batchController.js'; + +export async function batchRoutes(fastify: FastifyInstance) { + // 执行批处理任务 + fastify.post('/batch/execute', executeBatch); + + // 获取任务状态 + fastify.get('/batch/tasks/:taskId', getTask); + + // 获取任务结果 + fastify.get('/batch/tasks/:taskId/results', getTaskResults); + + // 重试失败的文档 + fastify.post('/batch/tasks/:taskId/retry-failed', retryFailed); + + // 获取所有预设模板 + fastify.get('/batch/templates', getTemplates); +} + + + + + + + + + diff --git a/backend/src/routes/chatRoutes.ts b/backend/src/legacy/routes/chatRoutes.ts similarity index 99% rename from backend/src/routes/chatRoutes.ts rename to backend/src/legacy/routes/chatRoutes.ts index b2886e0b..97710483 100644 --- a/backend/src/routes/chatRoutes.ts +++ b/backend/src/legacy/routes/chatRoutes.ts @@ -12,3 +12,4 @@ export async function chatRoutes(fastify: FastifyInstance) { fastify.delete('/chat/conversations/:id', chatController.deleteConversation.bind(chatController)); } + diff --git a/backend/src/routes/conversations.ts b/backend/src/legacy/routes/conversations.ts similarity index 100% rename from backend/src/routes/conversations.ts rename to backend/src/legacy/routes/conversations.ts diff --git a/backend/src/routes/knowledgeBases.ts b/backend/src/legacy/routes/knowledgeBases.ts similarity index 85% rename from backend/src/routes/knowledgeBases.ts rename to backend/src/legacy/routes/knowledgeBases.ts index 60e1d2f2..b52a775d 100644 --- a/backend/src/routes/knowledgeBases.ts +++ b/backend/src/legacy/routes/knowledgeBases.ts @@ -26,6 +26,9 @@ export default async function knowledgeBaseRoutes(fastify: FastifyInstance) { // 获取知识库统计信息 fastify.get('/knowledge-bases/:id/stats', knowledgeBaseController.getKnowledgeBaseStats); + // Phase 2: 获取文档选择(全文阅读模式) + fastify.get('/knowledge-bases/:id/document-selection', knowledgeBaseController.getDocumentSelection); + // ==================== 文档管理 API ==================== // 上传文档 @@ -37,6 +40,9 @@ export default async function knowledgeBaseRoutes(fastify: FastifyInstance) { // 获取文档详情 fastify.get('/documents/:id', documentController.getDocumentById); + // Phase 2: 获取文档全文 + fastify.get('/documents/:id/full-text', documentController.getDocumentFullText); + // 删除文档 fastify.delete('/documents/:id', documentController.deleteDocument); diff --git a/backend/src/routes/projects.ts b/backend/src/legacy/routes/projects.ts similarity index 97% rename from backend/src/routes/projects.ts rename to backend/src/legacy/routes/projects.ts index 061d4f0a..dbadea74 100644 --- a/backend/src/routes/projects.ts +++ b/backend/src/legacy/routes/projects.ts @@ -1,6 +1,6 @@ import { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify'; import { projectController } from '../controllers/projectController.js'; -import { validateProjectCreate, validateProjectUpdate } from '../middleware/validateProject.js'; +import { validateProjectCreate, validateProjectUpdate } from '../../common/middleware/validateProject.js'; interface ProjectParams { id: string; diff --git a/backend/src/legacy/routes/reviewRoutes.ts b/backend/src/legacy/routes/reviewRoutes.ts new file mode 100644 index 00000000..e1ae3e91 --- /dev/null +++ b/backend/src/legacy/routes/reviewRoutes.ts @@ -0,0 +1,50 @@ +import type { FastifyInstance } from 'fastify'; +import * as reviewController from '../controllers/reviewController.js'; + +export default async function reviewRoutes(fastify: FastifyInstance) { + // ==================== 稿件审查 API ==================== + + // 上传稿件并开始审查 + fastify.post('/review/upload', reviewController.uploadManuscript); + + // 获取任务状态 + fastify.get('/review/tasks/:taskId', reviewController.getTaskStatus); + + // 获取审查报告 + fastify.get('/review/tasks/:taskId/report', reviewController.getTaskReport); + + // 获取任务列表 + fastify.get('/review/tasks', reviewController.getTaskList); + + // 删除任务 + fastify.delete('/review/tasks/:taskId', reviewController.deleteTask); +} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/backend/src/services/agentService.ts b/backend/src/legacy/services/agentService.ts similarity index 97% rename from backend/src/services/agentService.ts rename to backend/src/legacy/services/agentService.ts index 8fe33f7a..d7249d50 100644 --- a/backend/src/services/agentService.ts +++ b/backend/src/legacy/services/agentService.ts @@ -43,8 +43,8 @@ class AgentService { constructor() { // 配置文件路径 - this.configPath = path.resolve(__dirname, '../../config/agents.yaml'); - this.promptsPath = path.resolve(__dirname, '../../prompts'); + this.configPath = path.resolve(__dirname, '../../../config/agents.yaml'); + this.promptsPath = path.resolve(__dirname, '../../../prompts'); // 初始化加载配置 this.loadAgents(); diff --git a/backend/src/legacy/services/batchService.ts b/backend/src/legacy/services/batchService.ts new file mode 100644 index 00000000..b1f74e8f --- /dev/null +++ b/backend/src/legacy/services/batchService.ts @@ -0,0 +1,420 @@ +/** + * Phase 3: 批处理模式 - 批处理服务 + * + * 核心功能: + * 1. 执行批处理任务(3并发) + * 2. 处理单个文档 + * 3. 进度推送(WebSocket) + * 4. 错误处理和重试 + */ + +import PQueue from 'p-queue'; +import { prisma } from '../../config/database.js'; +import { LLMFactory } from '../../common/llm/adapters/LLMFactory.js'; +import { ModelType } from '../../common/llm/adapters/types.js'; +import { getTemplate } from '../templates/clinicalResearch.js'; +import { parseJSON } from '../../common/utils/jsonParser.js'; + +export interface ExecuteBatchTaskParams { + userId: string; + kbId: string; + documentIds: string[]; + templateType: 'preset' | 'custom'; + templateId?: string; // 预设模板ID + customPrompt?: string; // 自定义提示词 + modelType: ModelType; + taskName?: string; + existingTaskId?: string; // 已存在的任务ID(可选) + onProgress?: (progress: BatchProgress) => void; +} + +export interface BatchProgress { + taskId: string; + completed: number; + total: number; + failed: number; + currentDocument?: string; + estimatedSeconds?: number; +} + +export interface BatchTaskResult { + taskId: string; + status: 'processing' | 'completed' | 'failed'; + totalDocuments: number; + completedCount: number; + failedCount: number; + durationSeconds?: number; +} + +/** + * 执行批处理任务 + */ +export async function executeBatchTask( + params: ExecuteBatchTaskParams +): Promise { + const { + userId, + kbId, + documentIds, + templateType, + templateId, + customPrompt, + modelType, + taskName, + existingTaskId, + onProgress, + } = params; + + console.log('📦 [BatchService] 开始执行批处理任务', { + userId, + kbId, + documentCount: documentIds.length, + templateType, + modelType, + existingTaskId: existingTaskId || '新建', + }); + + // 验证文献数量 (3-50篇) + if (documentIds.length < 3 || documentIds.length > 50) { + throw new Error(`文献数量必须在3-50篇之间,当前:${documentIds.length}篇`); + } + + // 获取模板或使用自定义提示词 + let systemPrompt: string; + let userPromptTemplate: string; + let expectedFields: string[] = []; + + if (templateType === 'preset') { + if (!templateId) { + throw new Error('预设模板类型需要提供templateId'); + } + + const template = getTemplate(templateId); + if (!template) { + throw new Error(`未找到模板: ${templateId}`); + } + + systemPrompt = template.systemPrompt; + userPromptTemplate = template.userPrompt; + expectedFields = template.outputFields.map(f => f.key); + } else { + // 自定义模板 + if (!customPrompt) { + throw new Error('自定义模板需要提供customPrompt'); + } + + systemPrompt = '你是一个专业的文献分析助手。请根据用户的要求分析文献内容。'; + userPromptTemplate = customPrompt; + } + + // 使用已存在的任务或创建新任务 + let task; + if (existingTaskId) { + task = await prisma.batchTask.findUnique({ + where: { id: existingTaskId }, + }); + if (!task) { + throw new Error(`任务不存在: ${existingTaskId}`); + } + console.log(`✅ [BatchService] 使用已存在的任务: ${task.id}`); + } else { + task = await prisma.batchTask.create({ + data: { + userId, + kbId, + name: taskName || `批处理任务_${new Date().toLocaleString('zh-CN')}`, + templateType, + templateId: templateId || null, + prompt: userPromptTemplate, + status: 'processing', + totalDocuments: documentIds.length, + completedCount: 0, + failedCount: 0, + modelType, + concurrency: 3, // 固定3并发 + startedAt: new Date(), + }, + }); + console.log(`✅ [BatchService] 创建任务记录: ${task.id}`); + } + + const startTime = Date.now(); + let completedCount = 0; + let failedCount = 0; + + // 创建并发队列(固定3并发) + const queue = new PQueue({ concurrency: 3 }); + + // 处理所有文档 + const promises = documentIds.map((docId, index) => + queue.add(async () => { + try { + console.log(`🔄 [BatchService] 处理文档 ${index + 1}/${documentIds.length}: ${docId}`); + + // 获取文档 + const document = await prisma.document.findUnique({ + where: { id: docId }, + select: { + id: true, + filename: true, + extractedText: true, + tokensCount: true, + }, + }); + + if (!document) { + throw new Error(`文档不存在: ${docId}`); + } + + if (!document.extractedText) { + throw new Error(`文档未提取文本: ${document.filename}`); + } + + // 调用LLM处理 + const result = await processDocument({ + document, + systemPrompt, + userPromptTemplate, + modelType, + templateType, + expectedFields, + }); + + // 保存结果 + await prisma.batchResult.create({ + data: { + taskId: task.id, + documentId: docId, + status: 'success', + data: result.data, + rawOutput: result.rawOutput, + processingTimeMs: result.processingTimeMs, + tokensUsed: result.tokensUsed, + }, + }); + + completedCount++; + console.log(`✅ [BatchService] 文档处理成功: ${document.filename} (${result.processingTimeMs}ms)`); + + } catch (error: any) { + // 处理失败 + console.error(`❌ [BatchService] 文档处理失败: ${docId}`, error); + + await prisma.batchResult.create({ + data: { + taskId: task.id, + documentId: docId, + status: 'failed', + errorMessage: error.message, + }, + }); + + failedCount++; + } + + // 推送进度 + if (onProgress) { + const progress: BatchProgress = { + taskId: task.id, + completed: completedCount + failedCount, + total: documentIds.length, + failed: failedCount, + estimatedSeconds: calculateEstimatedTime( + completedCount + failedCount, + documentIds.length, + Date.now() - startTime + ), + }; + onProgress(progress); + } + + // 更新任务进度 + await prisma.batchTask.update({ + where: { id: task.id }, + data: { + completedCount, + failedCount, + }, + }); + }) + ); + + // 等待所有任务完成 + await Promise.allSettled(promises); + + // 计算总时长 + const durationSeconds = Math.round((Date.now() - startTime) / 1000); + + // 更新任务状态 + await prisma.batchTask.update({ + where: { id: task.id }, + data: { + status: 'completed', + completedAt: new Date(), + durationSeconds, + }, + }); + + console.log(`🎉 [BatchService] 批处理任务完成: ${task.id}`, { + total: documentIds.length, + success: completedCount, + failed: failedCount, + durationSeconds, + }); + + return { + taskId: task.id, + status: 'completed', + totalDocuments: documentIds.length, + completedCount, + failedCount, + durationSeconds, + }; +} + +/** + * 处理单个文档 + */ +async function processDocument(params: { + document: { + id: string; + filename: string; + extractedText: string; + tokensCount: number | null; + }; + systemPrompt: string; + userPromptTemplate: string; + modelType: ModelType; + templateType: 'preset' | 'custom'; + expectedFields: string[]; +}): Promise<{ + data: any; + rawOutput: string; + processingTimeMs: number; + tokensUsed?: number; +}> { + const { + document, + systemPrompt, + userPromptTemplate, + modelType, + templateType, + expectedFields, + } = params; + + const startTime = Date.now(); + + // 构造完整的用户消息 + const userMessage = `${userPromptTemplate}\n\n【文献:${document.filename}】\n\n${document.extractedText}`; + + // 调用LLM + const adapter = LLMFactory.getAdapter(modelType); + const response = await adapter.chat( + [ + { role: 'system', content: systemPrompt }, + { role: 'user', content: userMessage }, + ], + { + temperature: 0.3, // 降低温度提高稳定性 + maxTokens: 2000, + } + ); + + const processingTimeMs = Date.now() - startTime; + const rawOutput = response.content; + + // 解析结果 + let data: any; + + if (templateType === 'preset') { + // 预设模板:解析JSON + const parseResult = parseJSON(rawOutput, expectedFields); + + if (!parseResult.success) { + throw new Error(`JSON解析失败: ${parseResult.error}`); + } + + data = parseResult.data; + } else { + // 自定义模板:直接使用文本 + data = { + extracted_text: rawOutput, + }; + } + + return { + data, + rawOutput, + processingTimeMs, + tokensUsed: response.usage?.totalTokens, + }; +} + +/** + * 计算预估剩余时间 + */ +function calculateEstimatedTime( + completed: number, + total: number, + elapsedMs: number +): number { + if (completed === 0) return 0; + + const avgTimePerDoc = elapsedMs / completed; + const remaining = total - completed; + return Math.round((avgTimePerDoc * remaining) / 1000); +} + +/** + * 重试失败的文档 + */ +export async function retryFailedDocuments( + taskId: string, + onProgress?: (progress: BatchProgress) => void +): Promise<{ retriedCount: number }> { + console.log(`🔄 [BatchService] 重试失败文档: ${taskId}`); + + // 获取任务信息 + const task = await prisma.batchTask.findUnique({ + where: { id: taskId }, + include: { + results: { + where: { status: 'failed' }, + }, + }, + }); + + if (!task) { + throw new Error(`任务不存在: ${taskId}`); + } + + const failedDocIds = task.results.map(r => r.documentId); + + if (failedDocIds.length === 0) { + return { retriedCount: 0 }; + } + + // 删除旧的失败记录 + await prisma.batchResult.deleteMany({ + where: { + taskId, + status: 'failed', + }, + }); + + // 重新执行 + await executeBatchTask({ + userId: task.userId, + kbId: task.kbId, + documentIds: failedDocIds, + templateType: task.templateType as 'preset' | 'custom', + templateId: task.templateId || undefined, + customPrompt: task.templateType === 'custom' ? task.prompt : undefined, + modelType: task.modelType as ModelType, + taskName: `${task.name} (重试)`, + onProgress, + }); + + return { retriedCount: failedDocIds.length }; +} + diff --git a/backend/src/services/conversationService.ts b/backend/src/legacy/services/conversationService.ts similarity index 71% rename from backend/src/services/conversationService.ts rename to backend/src/legacy/services/conversationService.ts index 9493658c..28eb4bb5 100644 --- a/backend/src/services/conversationService.ts +++ b/backend/src/legacy/services/conversationService.ts @@ -1,9 +1,78 @@ -import { prisma } from '../config/database.js'; -import { LLMFactory } from '../adapters/LLMFactory.js'; -import { Message, ModelType, StreamChunk } from '../adapters/types.js'; +import { prisma } from '../../config/database.js'; +import { LLMFactory } from '../../common/llm/adapters/LLMFactory.js'; +import { Message, ModelType, StreamChunk } from '../../common/llm/adapters/types.js'; import { agentService } from './agentService.js'; import * as knowledgeBaseService from './knowledgeBaseService.js'; +/** + * 引用信息接口 + */ +interface Citation { + id: number; + fileName: string; + position: number; + score: number; + content: string; +} + +/** + * 提取文本片段(用于引用上下文) + * @param text 完整文本 + * @param maxLength 最大长度(默认100字) + * @returns 提取的片段 + */ +function extractContextPreview(text: string, maxLength: number = 100): string { + if (!text) return ''; + + // 移除多余的空白字符 + const cleaned = text.replace(/\s+/g, ' ').trim(); + + // 如果文本短于限制,直接返回 + if (cleaned.length <= maxLength) { + return cleaned; + } + + // 截取前maxLength个字符,并尝试在句号、问号、感叹号处截断 + const truncated = cleaned.substring(0, maxLength); + const lastPunctuation = Math.max( + truncated.lastIndexOf('。'), + truncated.lastIndexOf('!'), + truncated.lastIndexOf('?'), + truncated.lastIndexOf('.'), + truncated.lastIndexOf('!'), + truncated.lastIndexOf('?') + ); + + // 如果找到了标点符号,在标点后截断;否则直接截断并加省略号 + if (lastPunctuation > maxLength * 0.5) { + return truncated.substring(0, lastPunctuation + 1); + } + + return truncated + '...'; +} + +/** + * 格式化引用清单 + * @param citations 引用列表 + * @returns 格式化的引用清单字符串 + */ +function formatCitations(citations: Citation[]): string { + if (citations.length === 0) return ''; + + let result = '\n\n---\n\n📚 **参考文献**\n\n'; + + for (const cite of citations) { + const scorePercent = (cite.score * 100).toFixed(0); + const preview = extractContextPreview(cite.content, 100); + + // 使用HTML span标签给引用编号添加id,方便跳转 + result += `[${cite.id}] 📄 **${cite.fileName}** - 第${cite.position}段 (相关度${scorePercent}%)\n`; + result += ` "${preview}"\n\n`; + } + + return result; +} + interface CreateConversationData { userId: string; projectId: string; @@ -187,7 +256,7 @@ export class ConversationService { } else { // 后续消息:只发送用户输入和知识库上下文(如果有) if (knowledgeBaseContext) { - userPromptContent = `${userInput}\n\n## 参考文献(来自知识库)\n${knowledgeBaseContext}`; + userPromptContent = `${userInput}\n\n## 参考文献(来自知识库)\n\n**重要提示**:下面提供的文献片段已经用[来源N]进行了标记。请在回答中引用具体来源时使用对应的编号,如"根据[来源1]..."或"研究表明[来源3][来源5]..."。系统会在你回答结束后自动显示完整的引用清单。\n\n${knowledgeBaseContext}`; console.log(`📝 [assembleContext] 后续消息+知识库,总长度: ${userPromptContent.length}`); console.log(`📋 [assembleContext] userPromptContent预览:\n${userPromptContent.substring(0, 300)}...`); } else { @@ -233,6 +302,9 @@ export class ConversationService { // 获取知识库上下文(如果有@知识库) let knowledgeBaseContext = ''; + const allCitations: Citation[] = []; // 存储所有引用信息 + let citationCounter = 1; // 全局引用计数器 + if (knowledgeBaseIds && knowledgeBaseIds.length > 0) { const knowledgeResults: string[] = []; @@ -243,7 +315,7 @@ export class ConversationService { userId, kbId, content, - 3 // 每个知识库返回3个最相关的段落 + 15 // Phase 1优化:从3增加到15个最相关的段落 ); // 格式化检索结果 @@ -253,15 +325,27 @@ export class ConversationService { select: { name: true }, }); - knowledgeResults.push( - `【知识库:${kbInfo?.name || '未命名'}】\n` + + // 优化格式:使用[来源N]标记,便于AI引用 + const formattedResult = `【知识库:${kbInfo?.name || '未命名'}】\n` + searchResult.records - .map((record: any, index: number) => { + .map((record: any) => { + const citationId = citationCounter++; const score = (record.score * 100).toFixed(1); - return `${index + 1}. [相关度${score}%] ${record.segment.content}`; + + // 保存引用信息 + allCitations.push({ + id: citationId, + fileName: record.segment?.document?.name || record.document_name || '未知文档', + position: record.segment?.position || record.segment_position || 0, + score: record.score, + content: record.segment?.content || record.content || '', + }); + + return `[来源${citationId}] [相关度${score}%]\n${record.segment?.content || record.content}`; }) - .join('\n\n') - ); + .join('\n\n'); + + knowledgeResults.push(formattedResult); } } catch (error) { console.error(`Failed to search knowledge base ${kbId}:`, error); @@ -297,6 +381,13 @@ export class ConversationService { topP: modelConfig?.topP, }); + // AI回答完毕后,追加引用清单 + let finalContent = response.content; + if (allCitations.length > 0) { + const citationsText = formatCitations(allCitations); + finalContent += citationsText; + } + // 保存用户消息 const userMessage = await prisma.message.create({ data: { @@ -314,7 +405,7 @@ export class ConversationService { data: { conversationId, role: 'assistant', - content: response.content, + content: finalContent, model: response.model, tokens: response.usage?.totalTokens, metadata: { @@ -352,6 +443,9 @@ export class ConversationService { // 获取知识库上下文(如果有@知识库) console.log('📚 [sendMessageStream] 开始处理知识库', { knowledgeBaseIds }); let knowledgeBaseContext = ''; + const allCitations: Citation[] = []; // 存储所有引用信息 + let citationCounter = 1; // 全局引用计数器 + if (knowledgeBaseIds && knowledgeBaseIds.length > 0) { const knowledgeResults: string[] = []; @@ -363,7 +457,7 @@ export class ConversationService { userId, kbId, content, - 3 // 每个知识库返回3个最相关的段落 + 15 // Phase 1优化:从3增加到15个最相关的段落 ); console.log(`✅ [sendMessageStream] 检索结果`, { @@ -378,11 +472,23 @@ export class ConversationService { select: { name: true }, }); + // 优化格式:使用[来源N]标记,便于AI引用 const formattedResult = `【知识库:${kbInfo?.name || '未命名'}】\n` + searchResult.records - .map((record: any, index: number) => { + .map((record: any) => { + const citationId = citationCounter++; const score = (record.score * 100).toFixed(1); - return `${index + 1}. [相关度${score}%] ${record.segment.content}`; + + // 保存引用信息 + allCitations.push({ + id: citationId, + fileName: record.segment?.document?.name || record.document_name || '未知文档', + position: record.segment?.position || record.segment_position || 0, + score: record.score, + content: record.segment?.content || record.content || '', + }); + + return `[来源${citationId}] [相关度${score}%]\n${record.segment?.content || record.content}`; }) .join('\n\n'); @@ -400,7 +506,7 @@ export class ConversationService { if (knowledgeResults.length > 0) { knowledgeBaseContext = knowledgeResults.join('\n\n---\n\n'); console.log(`💾 [sendMessageStream] 知识库上下文总长度: ${knowledgeBaseContext.length} 字符`); - console.log(`📋 [sendMessageStream] 知识库上下文预览:\n${knowledgeBaseContext.substring(0, 500)}...`); + console.log(`📚 [sendMessageStream] 收集到 ${allCitations.length} 个引用`); } else { console.warn('⚠️ [sendMessageStream] 没有构建任何知识库上下文'); } @@ -455,6 +561,19 @@ export class ConversationService { yield chunk; } + // AI回答完毕后,追加引用清单 + if (allCitations.length > 0) { + console.log(`📚 [sendMessageStream] 追加 ${allCitations.length} 个引用清单`); + const citationsText = formatCitations(allCitations); + fullContent += citationsText; + + // 将引用清单也流式输出 + yield { + content: citationsText, + done: false, + }; + } + // 流式输出完成后,保存助手回复 await prisma.message.create({ data: { diff --git a/backend/src/services/documentService.ts b/backend/src/legacy/services/documentService.ts similarity index 80% rename from backend/src/services/documentService.ts rename to backend/src/legacy/services/documentService.ts index 62a29c18..f7552c67 100644 --- a/backend/src/services/documentService.ts +++ b/backend/src/legacy/services/documentService.ts @@ -1,5 +1,6 @@ -import { prisma } from '../config/database.js'; -import { difyClient } from '../clients/DifyClient.js'; +import { prisma } from '../../config/database.js'; +import { difyClient } from '../../common/rag/DifyClient.js'; +import { extractionClient } from '../../common/document/ExtractionClient.js'; /** * 文档服务 @@ -54,32 +55,64 @@ export async function uploadDocument( }); try { - // 4. 上传到Dify + // 4. Phase 2: 调用提取服务提取文本内容 + let extractionResult; + let extractedText = ''; + let extractionMethod = ''; + let extractionQuality: number | null = null; + let charCount: number | null = null; + let detectedLanguage: string | null = null; + + try { + console.log(`[Phase2] 开始提取文档: ${filename}`); + extractionResult = await extractionClient.extractDocument(file, filename); + + if (extractionResult.success) { + extractedText = extractionResult.text; + extractionMethod = extractionResult.method; + extractionQuality = extractionResult.quality || null; + charCount = extractionResult.metadata?.char_count || null; + detectedLanguage = extractionResult.language || null; + + console.log(`[Phase2] 提取成功: method=${extractionMethod}, chars=${charCount}, language=${detectedLanguage}`); + } + } catch (extractionError) { + console.error('[Phase2] 文档提取失败,但继续上传到Dify:', extractionError); + // 提取失败不影响Dify上传,但记录错误 + } + + // 5. 上传到Dify const difyResult = await difyClient.uploadDocumentDirectly( knowledgeBase.difyDatasetId, file, filename ); - // 5. 更新文档记录(更新difyDocumentId和状态) + // 6. 更新文档记录(更新difyDocumentId、状态和Phase 2字段) const updatedDocument = await prisma.document.update({ where: { id: document.id }, data: { difyDocumentId: difyResult.document.id, status: difyResult.document.indexing_status, progress: 50, + // Phase 2新增字段 + extractedText: extractedText || null, + extractionMethod: extractionMethod || null, + extractionQuality: extractionQuality, + charCount: charCount, + language: detectedLanguage, }, }); - // 6. 启动后台轮询任务,等待处理完成 + // 7. 启动后台轮询任务,等待处理完成 pollDocumentStatus(userId, kbId, document.id, difyResult.document.id).catch(error => { console.error('Failed to poll document status:', error); }); - // 7. 更新知识库统计 + // 8. 更新知识库统计 await updateKnowledgeBaseStats(kbId); - // 8. 转换BigInt为Number + // 9. 转换BigInt为Number return { ...updatedDocument, fileSizeBytes: Number(updatedDocument.fileSizeBytes), diff --git a/backend/src/services/knowledgeBaseService.ts b/backend/src/legacy/services/knowledgeBaseService.ts similarity index 73% rename from backend/src/services/knowledgeBaseService.ts rename to backend/src/legacy/services/knowledgeBaseService.ts index 3e7ee5d9..8f2043d7 100644 --- a/backend/src/services/knowledgeBaseService.ts +++ b/backend/src/legacy/services/knowledgeBaseService.ts @@ -1,5 +1,6 @@ -import { prisma } from '../config/database.js'; -import { difyClient } from '../clients/DifyClient.js'; +import { prisma } from '../../config/database.js'; +import { difyClient } from '../../common/rag/DifyClient.js'; +import { calculateDocumentTokens, selectDocumentsForFullText, TOKEN_LIMITS } from './tokenService.js'; /** * 知识库服务 @@ -194,7 +195,7 @@ export async function searchKnowledgeBase( userId: string, kbId: string, query: string, - topK: number = 3 + topK: number = 15 // Phase 1优化:默认从3增加到15 ) { console.log('🔍 [searchKnowledgeBase] 开始检索', { kbId, query, topK }); @@ -288,3 +289,76 @@ export async function getKnowledgeBaseStats(userId: string, kbId: string) { return stats; } +/** + * 获取知识库文档选择(用于全文阅读模式) + * Phase 2新增:根据Token限制选择文档 + */ +export async function getDocumentSelection( + userId: string, + kbId: string, + maxFiles?: number, + maxTokens?: number +) { + // 1. 验证权限 + const knowledgeBase = await prisma.knowledgeBase.findFirst({ + where: { id: kbId, userId }, + include: { + documents: { + where: { + status: 'completed', // 只选择已完成的文档 + }, + select: { + id: true, + filename: true, + extractedText: true, + charCount: true, + extractionMethod: true, + tokensCount: true, + fileSizeBytes: true, + }, + orderBy: { uploadedAt: 'desc' }, + }, + }, + }); + + if (!knowledgeBase) { + throw new Error('Knowledge base not found or access denied'); + } + + // 2. 计算每个文档的Token数 + const documentTokens = calculateDocumentTokens(knowledgeBase.documents); + + // 3. 选择文档(根据Token限制) + const selection = selectDocumentsForFullText( + documentTokens, + maxFiles || TOKEN_LIMITS.MAX_FILES, + maxTokens || TOKEN_LIMITS.MAX_TOTAL_TOKENS + ); + + // 4. 返回结果 + return { + knowledgeBaseId: kbId, + knowledgeBaseName: knowledgeBase.name, + limits: { + maxFiles: maxFiles || TOKEN_LIMITS.MAX_FILES, + maxTokens: maxTokens || TOKEN_LIMITS.MAX_TOTAL_TOKENS, + }, + selection: { + selectedCount: selection.totalFiles, + selectedTokens: selection.totalTokens, + excludedCount: selection.excludedDocuments.length, + availableTokens: selection.availableTokens, + reason: selection.reason, + }, + selectedDocuments: selection.selectedDocuments.map(doc => ({ + ...doc, + // 查找原始文档信息 + ...knowledgeBase.documents.find(d => d.id === doc.documentId), + })), + excludedDocuments: selection.excludedDocuments.map(doc => ({ + ...doc, + // 查找原始文档信息 + ...knowledgeBase.documents.find(d => d.id === doc.documentId), + })), + }; +} diff --git a/backend/src/services/projectService.ts b/backend/src/legacy/services/projectService.ts similarity index 97% rename from backend/src/services/projectService.ts rename to backend/src/legacy/services/projectService.ts index 7ed26993..ff86bab3 100644 --- a/backend/src/services/projectService.ts +++ b/backend/src/legacy/services/projectService.ts @@ -1,4 +1,4 @@ -import { prisma } from '../config/database.js'; +import { prisma } from '../../config/database.js'; export interface CreateProjectDTO { name: string; diff --git a/backend/src/legacy/services/reviewService.ts b/backend/src/legacy/services/reviewService.ts new file mode 100644 index 00000000..5cedfb77 --- /dev/null +++ b/backend/src/legacy/services/reviewService.ts @@ -0,0 +1,452 @@ +import { prisma } from '../../config/database.js'; +import { extractionClient } from '../../common/document/ExtractionClient.js'; +import { LLMFactory } from '../../common/llm/adapters/LLMFactory.js'; +import { ModelType } from '../../common/llm/adapters/types.js'; +import fs from 'fs/promises'; +import path from 'path'; +import { fileURLToPath } from 'url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); + +/** + * 稿件审查服务 + */ + +// ==================== 类型定义 ==================== + +export interface EditorialItem { + criterion: string; + status: 'pass' | 'warning' | 'fail'; + score: number; + issues: string[]; + suggestions: string[]; +} + +export interface EditorialReview { + overall_score: number; + summary: string; + items: EditorialItem[]; +} + +export interface MethodologyIssue { + type: string; + severity: 'major' | 'minor'; + description: string; + location: string; + suggestion: string; +} + +export interface MethodologyPart { + part: string; + score: number; + issues: MethodologyIssue[]; +} + +export interface MethodologyReview { + overall_score: number; + summary: string; + parts: MethodologyPart[]; +} + +// ==================== 主要功能函数 ==================== + +/** + * 审查稿件(主入口函数) + * @param file 文件Buffer + * @param filename 文件名 + * @param userId 用户ID + * @param modelType 模型类型 + * @returns 审查任务 + */ +export async function reviewManuscript( + file: Buffer, + filename: string, + userId: string, + modelType: ModelType = 'deepseek-v3' +) { + const startTime = Date.now(); + + // 1. 创建任务记录 + const task = await prisma.reviewTask.create({ + data: { + userId, + fileName: filename, + fileSize: file.length, + extractedText: '', // 初始为空 + status: 'pending', + modelUsed: modelType, + startedAt: new Date(), + }, + }); + + try { + // 2. 提取文档文本(异步执行,不阻塞响应) + processReviewTask(task.id, file, filename, userId, modelType, startTime).catch(error => { + console.error(`[ReviewService] Task ${task.id} failed:`, error); + }); + + return task; + } catch (error) { + // 如果任务创建失败,更新状态 + await prisma.reviewTask.update({ + where: { id: task.id }, + data: { + status: 'failed', + errorMessage: error instanceof Error ? error.message : 'Unknown error', + }, + }); + throw error; + } +} + +/** + * 处理审查任务(后台异步执行) + */ +async function processReviewTask( + taskId: string, + file: Buffer, + filename: string, + userId: string, + modelType: ModelType, + startTime: number +) { + try { + // 1. 更新状态为extracting + await prisma.reviewTask.update({ + where: { id: taskId }, + data: { status: 'extracting' }, + }); + + // 2. 提取文档文本 + console.log(`[ReviewService] 开始提取文档: ${filename}`); + const extractionResult = await extractionClient.extractDocument(file, filename); + + if (!extractionResult.success || !extractionResult.text) { + throw new Error('文档提取失败或内容为空'); + } + + const extractedText = extractionResult.text; + const wordCount = extractionResult.metadata?.char_count || extractedText.length; + + console.log(`[ReviewService] 提取成功: ${wordCount} 字符`); + + // 更新提取的文本 + await prisma.reviewTask.update({ + where: { id: taskId }, + data: { + extractedText, + wordCount, + status: 'reviewing_editorial', + }, + }); + + // 3. 执行稿约规范性评估 + console.log(`[ReviewService] 开始稿约规范性评估...`); + const editorialReview = await reviewEditorialStandards(extractedText, modelType); + + await prisma.reviewTask.update({ + where: { id: taskId }, + data: { + editorialReview: editorialReview as any, + status: 'reviewing_methodology', + }, + }); + + // 4. 执行方法学评估 + console.log(`[ReviewService] 开始方法学评估...`); + const methodologyReview = await reviewMethodology(extractedText, modelType); + + // 5. 计算总体评分(加权平均:稿约40% + 方法学60%) + const overallScore = editorialReview.overall_score * 0.4 + methodologyReview.overall_score * 0.6; + + // 6. 完成任务 + const endTime = Date.now(); + const durationSeconds = Math.floor((endTime - startTime) / 1000); + + await prisma.reviewTask.update({ + where: { id: taskId }, + data: { + methodologyReview: methodologyReview as any, + overallScore, + status: 'completed', + completedAt: new Date(), + durationSeconds, + }, + }); + + console.log(`[ReviewService] 任务完成: ${taskId}, 总分: ${overallScore.toFixed(1)}, 耗时: ${durationSeconds}s`); + } catch (error) { + console.error(`[ReviewService] 任务处理失败:`, error); + + // 更新任务状态为failed + await prisma.reviewTask.update({ + where: { id: taskId }, + data: { + status: 'failed', + errorMessage: error instanceof Error ? error.message : 'Unknown error', + }, + }); + } +} + +/** + * 稿约规范性评估 + * @param text 稿件文本 + * @param modelType 模型类型 + * @returns 评估结果 + */ +export async function reviewEditorialStandards( + text: string, + modelType: ModelType = 'deepseek-v3' +): Promise { + try { + // 1. 读取系统Prompt + const promptPath = path.join(__dirname, '../../../prompts/review_editorial_system.txt'); + const systemPrompt = await fs.readFile(promptPath, 'utf-8'); + + // 2. 构建消息 + const messages = [ + { role: 'system' as const, content: systemPrompt }, + { role: 'user' as const, content: `请对以下稿件进行稿约规范性评估:\n\n${text}` }, + ]; + + // 3. 调用LLM + console.log(`[ReviewService] 开始调用 ${modelType} 进行稿约规范性评估...`); + const llmAdapter = LLMFactory.getAdapter(modelType); + const response = await llmAdapter.chat(messages, { + temperature: 0.3, // 较低温度以获得更稳定的评估 + maxTokens: 8000, // 增加token限制,确保完整输出 + }); + console.log(`[ReviewService] ${modelType} 稿约规范性评估完成,响应长度: ${response.content.length}`); + + // 4. 解析JSON响应 + const result = parseJSONFromLLMResponse(response.content); + + // 5. 验证响应格式 + if (!result || typeof result.overall_score !== 'number' || !Array.isArray(result.items)) { + throw new Error('LLM返回的数据格式不正确'); + } + + return result; + } catch (error) { + console.error('[ReviewService] 稿约规范性评估失败:', error); + if (error instanceof Error) { + console.error('[ReviewService] 错误详情:', { + message: error.message, + stack: error.stack, + }); + } + throw new Error(`稿约规范性评估失败: ${error instanceof Error ? error.message : 'Unknown error'}`); + } +} + +/** + * 方法学评估 + * @param text 稿件文本 + * @param modelType 模型类型 + * @returns 评估结果 + */ +export async function reviewMethodology( + text: string, + modelType: ModelType = 'deepseek-v3' +): Promise { + try { + // 1. 读取系统Prompt + const promptPath = path.join(__dirname, '../../../prompts/review_methodology_system.txt'); + const systemPrompt = await fs.readFile(promptPath, 'utf-8'); + + // 2. 构建消息 + const messages = [ + { role: 'system' as const, content: systemPrompt }, + { role: 'user' as const, content: `请对以下稿件进行方法学评估:\n\n${text}` }, + ]; + + // 3. 调用LLM + console.log(`[ReviewService] 开始调用 ${modelType} 进行方法学评估...`); + const llmAdapter = LLMFactory.getAdapter(modelType); + const response = await llmAdapter.chat(messages, { + temperature: 0.3, + maxTokens: 8000, // 增加token限制,确保完整输出 + }); + console.log(`[ReviewService] ${modelType} 方法学评估完成,响应长度: ${response.content.length}`); + + // 4. 解析JSON响应 + const result = parseJSONFromLLMResponse(response.content); + + // 5. 验证响应格式 + if (!result || typeof result.overall_score !== 'number' || !Array.isArray(result.parts)) { + throw new Error('LLM返回的数据格式不正确'); + } + + return result; + } catch (error) { + console.error('[ReviewService] 方法学评估失败:', error); + if (error instanceof Error) { + console.error('[ReviewService] 错误详情:', { + message: error.message, + stack: error.stack, + }); + } + throw new Error(`方法学评估失败: ${error instanceof Error ? error.message : 'Unknown error'}`); + } +} + +/** + * 从LLM响应中解析JSON + * 支持多种格式:纯JSON、```json代码块、混合文本 + */ +function parseJSONFromLLMResponse(content: string): T { + try { + // 1. 尝试直接解析 + return JSON.parse(content) as T; + } catch { + // 2. 尝试提取```json代码块 + const jsonMatch = content.match(/```json\s*\n?([\s\S]*?)\n?```/); + if (jsonMatch) { + try { + return JSON.parse(jsonMatch[1].trim()) as T; + } catch { + // 继续尝试其他方法 + } + } + + // 3. 尝试提取{}或[]包裹的内容 + const objectMatch = content.match(/(\{[\s\S]*\})/); + if (objectMatch) { + try { + return JSON.parse(objectMatch[1]) as T; + } catch { + // 继续尝试其他方法 + } + } + + const arrayMatch = content.match(/(\[[\s\S]*\])/); + if (arrayMatch) { + try { + return JSON.parse(arrayMatch[1]) as T; + } catch { + // 失败 + } + } + + // 4. 所有尝试都失败 + throw new Error('无法从LLM响应中解析JSON'); + } +} + +// ==================== 任务查询与管理 ==================== + +/** + * 获取任务详情 + */ +export async function getReviewTask(userId: string, taskId: string) { + const task = await prisma.reviewTask.findFirst({ + where: { + id: taskId, + userId, + }, + }); + + if (!task) { + throw new Error('Task not found or access denied'); + } + + return task; +} + +/** + * 获取用户的所有审查任务 + */ +export async function getReviewTasks(userId: string, page: number = 1, limit: number = 20) { + const skip = (page - 1) * limit; + + const [tasks, total] = await Promise.all([ + prisma.reviewTask.findMany({ + where: { userId }, + orderBy: { createdAt: 'desc' }, + skip, + take: limit, + select: { + id: true, + fileName: true, + fileSize: true, + status: true, + overallScore: true, + modelUsed: true, + createdAt: true, + completedAt: true, + durationSeconds: true, + wordCount: true, + }, + }), + prisma.reviewTask.count({ + where: { userId }, + }), + ]); + + return { + tasks, + pagination: { + page, + limit, + total, + totalPages: Math.ceil(total / limit), + }, + }; +} + +/** + * 删除审查任务 + */ +export async function deleteReviewTask(userId: string, taskId: string) { + const task = await prisma.reviewTask.findFirst({ + where: { + id: taskId, + userId, + }, + }); + + if (!task) { + throw new Error('Task not found or access denied'); + } + + await prisma.reviewTask.delete({ + where: { id: taskId }, + }); + + return { success: true }; +} + +/** + * 获取任务报告(完整的评估结果) + */ +export async function getReviewReport(userId: string, taskId: string) { + const task = await prisma.reviewTask.findFirst({ + where: { + id: taskId, + userId, + }, + }); + + if (!task) { + throw new Error('Task not found or access denied'); + } + + if (task.status !== 'completed') { + throw new Error('Report is not ready yet. Task status: ' + task.status); + } + + return { + taskId: task.id, + fileName: task.fileName, + wordCount: task.wordCount, + modelUsed: task.modelUsed, + overallScore: task.overallScore, + editorialReview: task.editorialReview, + methodologyReview: task.methodologyReview, + completedAt: task.completedAt, + durationSeconds: task.durationSeconds, + }; +} + diff --git a/backend/src/legacy/services/tokenService.ts b/backend/src/legacy/services/tokenService.ts new file mode 100644 index 00000000..3cd2d8b5 --- /dev/null +++ b/backend/src/legacy/services/tokenService.ts @@ -0,0 +1,232 @@ +import { encoding_for_model, Tiktoken } from 'tiktoken'; + +/** + * Token计数服务 + * 用于全文阅读模式的Token管理 + */ + +// Token限制配置 +export const TOKEN_LIMITS = { + MAX_FILES: 50, // 最多50个文件 + MAX_TOTAL_TOKENS: 980000, // 最多980K tokens(为Qwen-Long 1M上下文留20K余量) + CONTEXT_RESERVE: 20000, // 预留给系统提示词和用户查询的token +}; + +// 缓存编码器 +let encoderCache: Tiktoken | null = null; + +/** + * 获取编码器(使用gpt-4作为Qwen的替代) + */ +function getEncoder(): Tiktoken { + if (!encoderCache) { + // Qwen使用类似GPT-4的tokenizer + encoderCache = encoding_for_model('gpt-4'); + } + return encoderCache; +} + +/** + * 计算文本的Token数 + */ +export function countTokens(text: string): number { + if (!text || text.trim().length === 0) { + return 0; + } + + try { + const encoder = getEncoder(); + const tokens = encoder.encode(text); + return tokens.length; + } catch (error) { + console.error('[TokenService] Failed to count tokens:', error); + // 降级:粗略估算(中文约1.5字符/token,英文约4字符/token) + const chineseChars = (text.match(/[\u4e00-\u9fff]/g) || []).length; + const totalChars = text.length; + const englishChars = totalChars - chineseChars; + + return Math.ceil(chineseChars / 1.5 + englishChars / 4); + } +} + +/** + * 批量计算多个文本的Token数 + */ +export function countTokensBatch(texts: string[]): number[] { + return texts.map(text => countTokens(text)); +} + +/** + * 计算文档Token数(基于提取的文本) + */ +export interface DocumentTokenInfo { + documentId: string; + filename: string; + charCount: number; + estimatedTokens: number; + extractionMethod?: string; +} + +/** + * 为文档列表计算Token数 + */ +export function calculateDocumentTokens( + documents: Array<{ + id: string; + filename: string; + extractedText?: string | null; + charCount?: number | null; + extractionMethod?: string | null; + }> +): DocumentTokenInfo[] { + return documents.map(doc => { + let estimatedTokens = 0; + + if (doc.extractedText) { + // 使用提取的文本计算精确token数 + estimatedTokens = countTokens(doc.extractedText); + } else if (doc.charCount) { + // 如果没有提取文本,使用字符数估算 + // 假设中英文混合,平均2.5字符/token + estimatedTokens = Math.ceil(doc.charCount / 2.5); + } + + return { + documentId: doc.id, + filename: doc.filename, + charCount: doc.charCount || 0, + estimatedTokens, + extractionMethod: doc.extractionMethod || undefined, + }; + }); +} + +/** + * 选择文档以满足Token限制 + * 策略:优先选择Token数少的文档,直到达到限制 + */ +export interface DocumentSelectionResult { + selectedDocuments: DocumentTokenInfo[]; + totalTokens: number; + totalFiles: number; + excludedDocuments: DocumentTokenInfo[]; + reason: 'all_included' | 'file_limit' | 'token_limit'; + availableTokens: number; +} + +export function selectDocumentsForFullText( + documents: DocumentTokenInfo[], + maxFiles: number = TOKEN_LIMITS.MAX_FILES, + maxTokens: number = TOKEN_LIMITS.MAX_TOTAL_TOKENS +): DocumentSelectionResult { + // 按Token数升序排序(优先选择小文件) + const sortedDocs = [...documents].sort( + (a, b) => a.estimatedTokens - b.estimatedTokens + ); + + const selected: DocumentTokenInfo[] = []; + const excluded: DocumentTokenInfo[] = []; + let totalTokens = 0; + + for (const doc of sortedDocs) { + // 检查文件数限制 + if (selected.length >= maxFiles) { + excluded.push(doc); + continue; + } + + // 检查Token限制 + if (totalTokens + doc.estimatedTokens > maxTokens) { + excluded.push(doc); + continue; + } + + // 添加到选中列表 + selected.push(doc); + totalTokens += doc.estimatedTokens; + } + + // 判断限制原因 + let reason: 'all_included' | 'file_limit' | 'token_limit' = 'all_included'; + if (excluded.length > 0) { + if (selected.length >= maxFiles) { + reason = 'file_limit'; + } else { + reason = 'token_limit'; + } + } + + return { + selectedDocuments: selected, + totalTokens, + totalFiles: selected.length, + excludedDocuments: excluded, + reason, + availableTokens: maxTokens - totalTokens, + }; +} + +/** + * 估算查询需要的Token数 + */ +export function estimateQueryTokens(query: string, systemPrompt?: string): number { + let total = countTokens(query); + + if (systemPrompt) { + total += countTokens(systemPrompt); + } + + // 为响应预留空间 + total += 2000; // 假设响应最多2000 tokens + + return total; +} + +/** + * 检查是否超过Token限制 + */ +export function checkTokenLimit( + documentsTokens: number, + queryTokens: number, + maxTokens: number = TOKEN_LIMITS.MAX_TOTAL_TOKENS +): { + withinLimit: boolean; + totalTokens: number; + maxTokens: number; + remaining: number; +} { + const totalTokens = documentsTokens + queryTokens; + const remaining = maxTokens - totalTokens; + + return { + withinLimit: remaining >= 0, + totalTokens, + maxTokens, + remaining, + }; +} + +/** + * 释放编码器(清理资源) + */ +export function cleanup() { + if (encoderCache) { + encoderCache.free(); + encoderCache = null; + } +} + +// 进程退出时清理 +if (typeof process !== 'undefined') { + process.on('exit', cleanup); + process.on('SIGINT', () => { + cleanup(); + process.exit(); + }); +} + + + + + + diff --git a/backend/src/legacy/templates/clinicalResearch.ts b/backend/src/legacy/templates/clinicalResearch.ts new file mode 100644 index 00000000..907e56f6 --- /dev/null +++ b/backend/src/legacy/templates/clinicalResearch.ts @@ -0,0 +1,152 @@ +/** + * Phase 3: 批处理模式 - 临床研究信息提取模板 + * + * 提取临床研究的8个核心字段: + * 1. 研究目的 + * 2. 研究设计 + * 3. 研究对象 + * 4. 样本量(text类型,保留原文描述) + * 5. 干预组 + * 6. 对照组 + * 7. 结果及数据 + * 8. 牛津评级(提供详细标准) + */ + +export interface TemplateField { + key: string; + label: string; + type: 'text' | 'longtext' | 'number'; + description?: string; +} + +export interface BatchTemplate { + id: string; + name: string; + description: string; + outputFields: TemplateField[]; + systemPrompt: string; + userPrompt: string; +} + +export const CLINICAL_RESEARCH_TEMPLATE: BatchTemplate = { + id: 'clinical_research', + name: '临床研究信息提取', + description: '提取研究目的、设计、对象、样本量、干预、对照、结果、证据等级', + + outputFields: [ + { + key: 'research_purpose', + label: '研究目的', + type: 'text', + description: '研究想要解决的问题或验证的假设' + }, + { + key: 'research_design', + label: '研究设计', + type: 'text', + description: '研究类型(RCT、队列研究等)' + }, + { + key: 'research_subjects', + label: '研究对象', + type: 'text', + description: '纳入/排除标准、人群特征' + }, + { + key: 'sample_size', + label: '样本量', + type: 'text', // ✅ text类型,保留原文描述 + description: '实际纳入的受试者人数' + }, + { + key: 'intervention_group', + label: '干预组', + type: 'text', + description: '实验组的干预措施' + }, + { + key: 'control_group', + label: '对照组', + type: 'text', + description: '对照组的情况' + }, + { + key: 'results_data', + label: '结果及数据', + type: 'longtext', + description: '主要结局指标的具体数据' + }, + { + key: 'oxford_level', + label: '牛津评级', + type: 'text', + description: '证据等级(1a-5)' + }, + ], + + systemPrompt: `你是一个专业的临床研究数据提取助手。 +你的任务是从临床研究文献中提取结构化信息。 +你的回答必须严格遵循JSON格式,不要有任何额外的文字说明。`, + + userPrompt: `请仔细阅读这篇临床研究文献,提取以下信息: + +1. **研究目的**:本研究想要解决什么问题或验证什么假设?用1-2句话概括。 + +2. **研究设计**:研究类型,如随机对照试验(RCT)、队列研究、病例对照研究、横断面研究、系统评价/Meta分析等。 + +3. **研究对象**:描述纳入标准、排除标准、人群特征(年龄、性别、疾病状态等)。 + +4. **样本量**:实际纳入的受试者人数,保留原文描述(如"干预组156人,对照组152人,共308人")。 + +5. **干预组**:实验组接受的治疗或干预措施,包括药物名称、剂量、给药方式、疗程等。 + +6. **对照组**:对照组的情况,如安慰剂、标准治疗、空白对照等。 + +7. **结果及数据**:主要结局指标的具体数据、统计结果、P值、置信区间等。包括基线数据对比和终点数据对比。 + +8. **牛津评级**:根据研究设计判断证据等级,参考以下标准: + - **1a**:系统评价/Meta分析(多个RCT的汇总分析) + - **1b**:单个随机对照试验(RCT) + - **2a**:设计良好的对照研究(无随机化) + - **2b**:设计良好的准实验研究(队列研究、病例对照研究) + - **3a**:描述性研究(横断面研究、病例系列) + - **3b**:个案报告(单一病例) + - **4**:专家意见、共识声明 + - **5**:基础研究(动物实验、体外研究) + +请严格按照以下JSON格式输出,不要有任何额外说明或前言: +{ + "research_purpose": "...", + "research_design": "...", + "research_subjects": "...", + "sample_size": "...", + "intervention_group": "...", + "control_group": "...", + "results_data": "...", + "oxford_level": "..." +}`, +}; + +// 导出所有预设模板 +export const PRESET_TEMPLATES: Record = { + [CLINICAL_RESEARCH_TEMPLATE.id]: CLINICAL_RESEARCH_TEMPLATE, +}; + +// 获取模板 +export function getTemplate(templateId: string): BatchTemplate | null { + return PRESET_TEMPLATES[templateId] || null; +} + +// 获取所有模板列表 +export function getAllTemplates(): BatchTemplate[] { + return Object.values(PRESET_TEMPLATES); +} + + + + + + + + +