refactor(backend): incremental architecture evolution (Task 19)
- Add common/ layer for shared capabilities (LLM, RAG, document, middleware) - Add legacy/ layer for existing business code - Move files to new structure (controllers, routes, services) - Update index.ts for new route registration - System remains fully functional
This commit is contained in:
264
backend/package-lock.json
generated
264
backend/package-lock.json
generated
@@ -18,8 +18,12 @@
|
||||
"dotenv": "^17.2.3",
|
||||
"fastify": "^5.6.1",
|
||||
"form-data": "^4.0.4",
|
||||
"html2canvas": "^1.4.1",
|
||||
"js-yaml": "^4.1.0",
|
||||
"jspdf": "^3.0.3",
|
||||
"p-queue": "^9.0.0",
|
||||
"prisma": "^6.17.0",
|
||||
"tiktoken": "^1.0.22",
|
||||
"zod": "^4.1.12"
|
||||
},
|
||||
"devDependencies": {
|
||||
@@ -32,6 +36,15 @@
|
||||
"typescript": "^5.9.3"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/runtime": {
|
||||
"version": "7.28.4",
|
||||
"resolved": "https://registry.npmmirror.com/@babel/runtime/-/runtime-7.28.4.tgz",
|
||||
"integrity": "sha512-Q/N6JNWvIvPnLDvjlE1OUBLPQHH6l3CltCEsHIujp45zQUSSh8K+gHnaEX45yAT1nyngnINhvWtzN+Nb9D8RAQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=6.9.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@cspotcode/source-map-support": {
|
||||
"version": "0.8.1",
|
||||
"resolved": "https://registry.npmmirror.com/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz",
|
||||
@@ -861,6 +874,26 @@
|
||||
"undici-types": "~7.14.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/pako": {
|
||||
"version": "2.0.4",
|
||||
"resolved": "https://registry.npmmirror.com/@types/pako/-/pako-2.0.4.tgz",
|
||||
"integrity": "sha512-VWDCbrLeVXJM9fihYodcLiIv0ku+AlOa/TQ1SvYOaBuyrSKgEcro95LJyIsJ4vSo6BXIxOKxiJAat04CmST9Fw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/raf": {
|
||||
"version": "3.4.3",
|
||||
"resolved": "https://registry.npmmirror.com/@types/raf/-/raf-3.4.3.tgz",
|
||||
"integrity": "sha512-c4YAvMedbPZ5tEyxzQdMoOhhJ4RD3rngZIdwC2/qDN3d7JpEhB6fiBRKVY1lg5B7Wk+uPBjn5f39j1/2MY1oOw==",
|
||||
"license": "MIT",
|
||||
"optional": true
|
||||
},
|
||||
"node_modules/@types/trusted-types": {
|
||||
"version": "2.0.7",
|
||||
"resolved": "https://registry.npmmirror.com/@types/trusted-types/-/trusted-types-2.0.7.tgz",
|
||||
"integrity": "sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==",
|
||||
"license": "MIT",
|
||||
"optional": true
|
||||
},
|
||||
"node_modules/abstract-logging": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmmirror.com/abstract-logging/-/abstract-logging-2.0.1.tgz",
|
||||
@@ -1008,6 +1041,15 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/base64-arraybuffer": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmmirror.com/base64-arraybuffer/-/base64-arraybuffer-1.0.2.tgz",
|
||||
"integrity": "sha512-I3yl4r9QB5ZRY3XuJVEPfc2XhZO6YweFPI+UovAzn+8/hb3oJ6lnysaFcjVpkCPfVWFUDvoZ8kmVDP7WyRtYtQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 0.6.0"
|
||||
}
|
||||
},
|
||||
"node_modules/binary-extensions": {
|
||||
"version": "2.3.0",
|
||||
"resolved": "https://registry.npmmirror.com/binary-extensions/-/binary-extensions-2.3.0.tgz",
|
||||
@@ -1104,6 +1146,26 @@
|
||||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/canvg": {
|
||||
"version": "3.0.11",
|
||||
"resolved": "https://registry.npmmirror.com/canvg/-/canvg-3.0.11.tgz",
|
||||
"integrity": "sha512-5ON+q7jCTgMp9cjpu4Jo6XbvfYwSB2Ow3kzHKfIyJfaCAOHLbdKPQqGKgfED/R5B+3TFFfe8pegYA+b423SRyA==",
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"@babel/runtime": "^7.12.5",
|
||||
"@types/raf": "^3.4.0",
|
||||
"core-js": "^3.8.3",
|
||||
"raf": "^3.4.1",
|
||||
"regenerator-runtime": "^0.13.7",
|
||||
"rgbcolor": "^1.0.1",
|
||||
"stackblur-canvas": "^2.0.0",
|
||||
"svg-pathdata": "^6.0.3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/chokidar": {
|
||||
"version": "4.0.3",
|
||||
"resolved": "https://registry.npmmirror.com/chokidar/-/chokidar-4.0.3.tgz",
|
||||
@@ -1178,6 +1240,18 @@
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/core-js": {
|
||||
"version": "3.46.0",
|
||||
"resolved": "https://registry.npmmirror.com/core-js/-/core-js-3.46.0.tgz",
|
||||
"integrity": "sha512-vDMm9B0xnqqZ8uSBpZ8sNtRtOdmfShrvT6h2TuQGLs0Is+cR0DYbj/KWP6ALVNbWPpqA/qPLoOuppJN07humpA==",
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/core-js"
|
||||
}
|
||||
},
|
||||
"node_modules/create-require": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmmirror.com/create-require/-/create-require-1.1.1.tgz",
|
||||
@@ -1185,6 +1259,15 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/css-line-break": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmmirror.com/css-line-break/-/css-line-break-2.1.0.tgz",
|
||||
"integrity": "sha512-FHcKFCZcAha3LwfVBhCQbW2nCNbkZXn7KVUJcsT5/P8YmfsVja0FMPJr0B903j/E69HUphKiV9iQArX8SDYA4w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"utrie": "^1.0.2"
|
||||
}
|
||||
},
|
||||
"node_modules/dateformat": {
|
||||
"version": "4.6.3",
|
||||
"resolved": "https://registry.npmmirror.com/dateformat/-/dateformat-4.6.3.tgz",
|
||||
@@ -1262,6 +1345,16 @@
|
||||
"node": ">=0.3.1"
|
||||
}
|
||||
},
|
||||
"node_modules/dompurify": {
|
||||
"version": "3.3.0",
|
||||
"resolved": "https://registry.npmmirror.com/dompurify/-/dompurify-3.3.0.tgz",
|
||||
"integrity": "sha512-r+f6MYR1gGN1eJv0TVQbhA7if/U7P87cdPl3HN5rikqaBSBxLiCb/b9O+2eG0cxz0ghyU+mU1QkbsOwERMYlWQ==",
|
||||
"license": "(MPL-2.0 OR Apache-2.0)",
|
||||
"optional": true,
|
||||
"optionalDependencies": {
|
||||
"@types/trusted-types": "^2.0.7"
|
||||
}
|
||||
},
|
||||
"node_modules/dotenv": {
|
||||
"version": "17.2.3",
|
||||
"resolved": "https://registry.npmmirror.com/dotenv/-/dotenv-17.2.3.tgz",
|
||||
@@ -1413,6 +1506,12 @@
|
||||
"@esbuild/win32-x64": "0.25.10"
|
||||
}
|
||||
},
|
||||
"node_modules/eventemitter3": {
|
||||
"version": "5.0.1",
|
||||
"resolved": "https://registry.npmmirror.com/eventemitter3/-/eventemitter3-5.0.1.tgz",
|
||||
"integrity": "sha512-GWkBvjiSZK87ELrYOSESUYeVIc9mvLLf/nXalMOS5dYrgZq9o5OVkbZAVM06CVxYsCwH9BDZFPlQTlPA1j4ahA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/exsolve": {
|
||||
"version": "1.0.7",
|
||||
"resolved": "https://registry.npmmirror.com/exsolve/-/exsolve-1.0.7.tgz",
|
||||
@@ -1499,6 +1598,17 @@
|
||||
"node": ">=20"
|
||||
}
|
||||
},
|
||||
"node_modules/fast-png": {
|
||||
"version": "6.4.0",
|
||||
"resolved": "https://registry.npmmirror.com/fast-png/-/fast-png-6.4.0.tgz",
|
||||
"integrity": "sha512-kAqZq1TlgBjZcLr5mcN6NP5Rv4V2f22z00c3g8vRrwkcqjerx7BEhPbOnWCPqaHUl2XWQBJQvOT/FQhdMT7X/Q==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/pako": "^2.0.3",
|
||||
"iobuffer": "^5.3.2",
|
||||
"pako": "^2.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/fast-querystring": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmmirror.com/fast-querystring/-/fast-querystring-1.1.2.tgz",
|
||||
@@ -1621,6 +1731,12 @@
|
||||
"xtend": "^4.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/fflate": {
|
||||
"version": "0.8.2",
|
||||
"resolved": "https://registry.npmmirror.com/fflate/-/fflate-0.8.2.tgz",
|
||||
"integrity": "sha512-cPJU47OaAoCbg0pBvzsgpTPhmhqI5eJjh/JIu8tPj5q+T7iLvW/JAYUqmE7KOB4R1ZyEhzBaIQpQpardBF5z8A==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/fill-range": {
|
||||
"version": "7.1.1",
|
||||
"resolved": "https://registry.npmmirror.com/fill-range/-/fill-range-7.1.1.tgz",
|
||||
@@ -1856,6 +1972,19 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/html2canvas": {
|
||||
"version": "1.4.1",
|
||||
"resolved": "https://registry.npmmirror.com/html2canvas/-/html2canvas-1.4.1.tgz",
|
||||
"integrity": "sha512-fPU6BHNpsyIhr8yyMpTLLxAbkaK8ArIBcmZIRiBLiDhjeqvXolaEmDGmELFuX9I4xDcaKKcJl+TKZLqruBbmWA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"css-line-break": "^2.1.0",
|
||||
"text-segmentation": "^1.0.3"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/ignore-by-default": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmmirror.com/ignore-by-default/-/ignore-by-default-1.0.1.tgz",
|
||||
@@ -1869,6 +1998,12 @@
|
||||
"integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/iobuffer": {
|
||||
"version": "5.4.0",
|
||||
"resolved": "https://registry.npmmirror.com/iobuffer/-/iobuffer-5.4.0.tgz",
|
||||
"integrity": "sha512-DRebOWuqDvxunfkNJAlc3IzWIPD5xVxwUNbHr7xKB8E6aLJxIPfNX3CoMJghcFjpv6RWQsrcJbghtEwSPoJqMA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/ipaddr.js": {
|
||||
"version": "2.2.0",
|
||||
"resolved": "https://registry.npmmirror.com/ipaddr.js/-/ipaddr.js-2.2.0.tgz",
|
||||
@@ -1980,6 +2115,23 @@
|
||||
"integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/jspdf": {
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmmirror.com/jspdf/-/jspdf-3.0.3.tgz",
|
||||
"integrity": "sha512-eURjAyz5iX1H8BOYAfzvdPfIKK53V7mCpBTe7Kb16PaM8JSXEcUQNBQaiWMI8wY5RvNOPj4GccMjTlfwRBd+oQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/runtime": "^7.26.9",
|
||||
"fast-png": "^6.2.0",
|
||||
"fflate": "^0.8.1"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"canvg": "^3.0.11",
|
||||
"core-js": "^3.6.0",
|
||||
"dompurify": "^3.2.4",
|
||||
"html2canvas": "^1.0.0-rc.5"
|
||||
}
|
||||
},
|
||||
"node_modules/light-my-request": {
|
||||
"version": "6.6.0",
|
||||
"resolved": "https://registry.npmmirror.com/light-my-request/-/light-my-request-6.6.0.tgz",
|
||||
@@ -2232,6 +2384,40 @@
|
||||
"wrappy": "1"
|
||||
}
|
||||
},
|
||||
"node_modules/p-queue": {
|
||||
"version": "9.0.0",
|
||||
"resolved": "https://registry.npmmirror.com/p-queue/-/p-queue-9.0.0.tgz",
|
||||
"integrity": "sha512-KO1RyxstL9g1mK76530TExamZC/S2Glm080Nx8PE5sTd7nlduDQsAfEl4uXX+qZjLiwvDauvzXavufy3+rJ9zQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"eventemitter3": "^5.0.1",
|
||||
"p-timeout": "^7.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=20"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/p-timeout": {
|
||||
"version": "7.0.1",
|
||||
"resolved": "https://registry.npmmirror.com/p-timeout/-/p-timeout-7.0.1.tgz",
|
||||
"integrity": "sha512-AxTM2wDGORHGEkPCt8yqxOTMgpfbEHqF51f/5fJCmwFC3C/zNcGT63SymH2ttOAaiIws2zVg4+izQCjrakcwHg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=20"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/pako": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmmirror.com/pako/-/pako-2.1.0.tgz",
|
||||
"integrity": "sha512-w+eufiZ1WuJYgPXbV/PO3NCMEc3xqylkKHzp8bxp1uW4qaSNQUkwmLLEc3kKsfz8lpV1F8Ht3U1Cm+9Srog2ug==",
|
||||
"license": "(MIT AND Zlib)"
|
||||
},
|
||||
"node_modules/pathe": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmmirror.com/pathe/-/pathe-2.0.3.tgz",
|
||||
@@ -2244,6 +2430,13 @@
|
||||
"integrity": "sha512-xCy9V055GLEqoFaHoC1SoLIaLmWctgCUaBaWxDZ7/Zx4CTyX7cJQLJOok/orfjZAh9kEYpjJa4d0KcJmCbctZA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/performance-now": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmmirror.com/performance-now/-/performance-now-2.1.0.tgz",
|
||||
"integrity": "sha512-7EAHlyLHI56VEIdK57uwHdHKIaAGbnXPiw0yWbarQZOKaKpvUIgW0jWRVLiatnM+XXlSwsanIBH/hzGMJulMow==",
|
||||
"license": "MIT",
|
||||
"optional": true
|
||||
},
|
||||
"node_modules/picomatch": {
|
||||
"version": "2.3.1",
|
||||
"resolved": "https://registry.npmmirror.com/picomatch/-/picomatch-2.3.1.tgz",
|
||||
@@ -2417,6 +2610,16 @@
|
||||
"integrity": "sha512-tYC1Q1hgyRuHgloV/YXs2w15unPVh8qfu/qCTfhTYamaw7fyhumKa2yGpdSo87vY32rIclj+4fWYQXUMs9EHvg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/raf": {
|
||||
"version": "3.4.1",
|
||||
"resolved": "https://registry.npmmirror.com/raf/-/raf-3.4.1.tgz",
|
||||
"integrity": "sha512-Sq4CW4QhwOHE8ucn6J34MqtZCeWFP2aQSmrlroYgqAV1PjStIhJXxYuTgUIfkEk7zTLjmIjLmU5q+fbD1NnOJA==",
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"performance-now": "^2.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/rc9": {
|
||||
"version": "2.1.2",
|
||||
"resolved": "https://registry.npmmirror.com/rc9/-/rc9-2.1.2.tgz",
|
||||
@@ -2449,6 +2652,13 @@
|
||||
"node": ">= 12.13.0"
|
||||
}
|
||||
},
|
||||
"node_modules/regenerator-runtime": {
|
||||
"version": "0.13.11",
|
||||
"resolved": "https://registry.npmmirror.com/regenerator-runtime/-/regenerator-runtime-0.13.11.tgz",
|
||||
"integrity": "sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==",
|
||||
"license": "MIT",
|
||||
"optional": true
|
||||
},
|
||||
"node_modules/require-from-string": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmmirror.com/require-from-string/-/require-from-string-2.0.2.tgz",
|
||||
@@ -2493,6 +2703,16 @@
|
||||
"integrity": "sha512-q1b3N5QkRUWUl7iyylaaj3kOpIT0N2i9MqIEQXP73GVsN9cw3fdx8X63cEmWhJGi2PPCF23Ijp7ktmd39rawIA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/rgbcolor": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmmirror.com/rgbcolor/-/rgbcolor-1.0.1.tgz",
|
||||
"integrity": "sha512-9aZLIrhRaD97sgVhtJOW6ckOEh6/GnvQtdVNfdZ6s67+3/XwLS9lBcQYzEEhYVeUowN7pRzMLsyGhK2i/xvWbw==",
|
||||
"license": "MIT OR SEE LICENSE IN FEEL-FREE.md",
|
||||
"optional": true,
|
||||
"engines": {
|
||||
"node": ">= 0.8.15"
|
||||
}
|
||||
},
|
||||
"node_modules/safe-buffer": {
|
||||
"version": "5.2.1",
|
||||
"resolved": "https://registry.npmmirror.com/safe-buffer/-/safe-buffer-5.2.1.tgz",
|
||||
@@ -2618,6 +2838,16 @@
|
||||
"node": ">= 10.x"
|
||||
}
|
||||
},
|
||||
"node_modules/stackblur-canvas": {
|
||||
"version": "2.7.0",
|
||||
"resolved": "https://registry.npmmirror.com/stackblur-canvas/-/stackblur-canvas-2.7.0.tgz",
|
||||
"integrity": "sha512-yf7OENo23AGJhBriGx0QivY5JP6Y1HbrrDI6WLt6C5auYZXlQrheoY8hD4ibekFKz1HOfE48Ww8kMWMnJD/zcQ==",
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"engines": {
|
||||
"node": ">=0.1.14"
|
||||
}
|
||||
},
|
||||
"node_modules/steed": {
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmmirror.com/steed/-/steed-1.1.3.tgz",
|
||||
@@ -2657,6 +2887,25 @@
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/svg-pathdata": {
|
||||
"version": "6.0.3",
|
||||
"resolved": "https://registry.npmmirror.com/svg-pathdata/-/svg-pathdata-6.0.3.tgz",
|
||||
"integrity": "sha512-qsjeeq5YjBZ5eMdFuUa4ZosMLxgr5RZ+F+Y1OrDhuOCEInRMA3x74XdBtggJcj9kOeInz0WE+LgCPDkZFlBYJw==",
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"engines": {
|
||||
"node": ">=12.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/text-segmentation": {
|
||||
"version": "1.0.3",
|
||||
"resolved": "https://registry.npmmirror.com/text-segmentation/-/text-segmentation-1.0.3.tgz",
|
||||
"integrity": "sha512-iOiPUo/BGnZ6+54OsWxZidGCsdU8YbE4PSpdPinp7DeMtUJNJBoJ/ouUSTJjHkh1KntHaltHl/gDs2FC4i5+Nw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"utrie": "^1.0.2"
|
||||
}
|
||||
},
|
||||
"node_modules/thread-stream": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmmirror.com/thread-stream/-/thread-stream-3.1.0.tgz",
|
||||
@@ -2666,6 +2915,12 @@
|
||||
"real-require": "^0.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/tiktoken": {
|
||||
"version": "1.0.22",
|
||||
"resolved": "https://registry.npmmirror.com/tiktoken/-/tiktoken-1.0.22.tgz",
|
||||
"integrity": "sha512-PKvy1rVF1RibfF3JlXBSP0Jrcw2uq3yXdgcEXtKTYn3QJ/cBRBHDnrJ5jHky+MENZ6DIPwNUGWpkVx+7joCpNA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/tinyexec": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmmirror.com/tinyexec/-/tinyexec-1.0.1.tgz",
|
||||
@@ -2795,6 +3050,15 @@
|
||||
"integrity": "sha512-QQiYxHuyZ9gQUIrmPo3IA+hUl4KYk8uSA7cHrcKd/l3p1OTpZcM0Tbp9x7FAtXdAYhlasd60ncPpgu6ihG6TOA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/utrie": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmmirror.com/utrie/-/utrie-1.0.2.tgz",
|
||||
"integrity": "sha512-1MLa5ouZiOmQzUbjbu9VmjLzn1QLXBhwpUa7kdLUQK+KQ5KA9I1vk5U4YHe/X2Ch7PYnJfWuWT+VbuxbGwljhw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"base64-arraybuffer": "^1.0.2"
|
||||
}
|
||||
},
|
||||
"node_modules/v8-compile-cache-lib": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmmirror.com/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz",
|
||||
|
||||
@@ -11,8 +11,12 @@
|
||||
"prisma:generate": "prisma generate",
|
||||
"prisma:migrate": "prisma migrate dev",
|
||||
"prisma:studio": "prisma studio",
|
||||
"prisma:seed": "tsx prisma/seed.ts",
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"prisma": {
|
||||
"seed": "tsx prisma/seed.ts"
|
||||
},
|
||||
"keywords": [
|
||||
"ai",
|
||||
"clinical",
|
||||
@@ -31,8 +35,12 @@
|
||||
"dotenv": "^17.2.3",
|
||||
"fastify": "^5.6.1",
|
||||
"form-data": "^4.0.4",
|
||||
"html2canvas": "^1.4.1",
|
||||
"js-yaml": "^4.1.0",
|
||||
"jspdf": "^3.0.3",
|
||||
"p-queue": "^9.0.0",
|
||||
"prisma": "^6.17.0",
|
||||
"tiktoken": "^1.0.22",
|
||||
"zod": "^4.1.12"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
||||
@@ -8,6 +8,7 @@ generator client {
|
||||
datasource db {
|
||||
provider = "postgresql"
|
||||
url = env("DATABASE_URL")
|
||||
schemas = ["platform_schema", "aia_schema", "pkb_schema", "asl_schema", "common_schema", "dc_schema", "rvw_schema", "admin_schema", "ssa_schema", "st_schema", "public"]
|
||||
}
|
||||
|
||||
// ==================== 用户模块 ====================
|
||||
@@ -38,11 +39,15 @@ model User {
|
||||
documents Document[]
|
||||
adminLogs AdminLog[]
|
||||
generalConversations GeneralConversation[]
|
||||
batchTasks BatchTask[] // Phase 3: 批处理任务
|
||||
taskTemplates TaskTemplate[] // Phase 3: 任务模板
|
||||
reviewTasks ReviewTask[] // 稿件审查任务
|
||||
|
||||
@@index([email])
|
||||
@@index([status])
|
||||
@@index([createdAt])
|
||||
@@map("users")
|
||||
@@schema("platform_schema")
|
||||
}
|
||||
|
||||
// ==================== 项目模块 ====================
|
||||
@@ -66,6 +71,7 @@ model Project {
|
||||
@@index([createdAt])
|
||||
@@index([deletedAt])
|
||||
@@map("projects")
|
||||
@@schema("aia_schema")
|
||||
}
|
||||
|
||||
// ==================== 对话模块 ====================
|
||||
@@ -95,6 +101,7 @@ model Conversation {
|
||||
@@index([createdAt])
|
||||
@@index([deletedAt])
|
||||
@@map("conversations")
|
||||
@@schema("aia_schema")
|
||||
}
|
||||
|
||||
model Message {
|
||||
@@ -115,6 +122,7 @@ model Message {
|
||||
@@index([createdAt])
|
||||
@@index([isPinned])
|
||||
@@map("messages")
|
||||
@@schema("aia_schema")
|
||||
}
|
||||
|
||||
// ==================== 知识库模块 ====================
|
||||
@@ -133,10 +141,12 @@ model KnowledgeBase {
|
||||
|
||||
user User @relation(fields: [userId], references: [id], onDelete: Cascade)
|
||||
documents Document[]
|
||||
batchTasks BatchTask[] // Phase 3: 批处理任务
|
||||
|
||||
@@index([userId])
|
||||
@@index([difyDatasetId])
|
||||
@@map("knowledge_bases")
|
||||
@@schema("pkb_schema")
|
||||
}
|
||||
|
||||
model Document {
|
||||
@@ -154,17 +164,122 @@ model Document {
|
||||
segmentsCount Int? @map("segments_count")
|
||||
tokensCount Int? @map("tokens_count")
|
||||
|
||||
// Phase 2: 全文阅读模式新增字段
|
||||
extractionMethod String? @map("extraction_method") // pymupdf/nougat/mammoth/direct
|
||||
extractionQuality Float? @map("extraction_quality") // 0-1质量分数
|
||||
charCount Int? @map("char_count") // 字符数
|
||||
language String? // 检测到的语言 (chinese/english)
|
||||
extractedText String? @map("extracted_text") @db.Text // 提取的文本内容
|
||||
|
||||
uploadedAt DateTime @default(now()) @map("uploaded_at")
|
||||
processedAt DateTime? @map("processed_at")
|
||||
|
||||
knowledgeBase KnowledgeBase @relation(fields: [kbId], references: [id], onDelete: Cascade)
|
||||
user User @relation(fields: [userId], references: [id], onDelete: Cascade)
|
||||
batchResults BatchResult[] // Phase 3: 批处理结果
|
||||
|
||||
@@index([kbId])
|
||||
@@index([userId])
|
||||
@@index([status])
|
||||
@@index([difyDocumentId])
|
||||
@@index([extractionMethod])
|
||||
@@map("documents")
|
||||
@@schema("pkb_schema")
|
||||
}
|
||||
|
||||
// ==================== Phase 3: 批处理模块 ====================
|
||||
|
||||
// 批处理任务
|
||||
model BatchTask {
|
||||
id String @id @default(uuid())
|
||||
userId String @map("user_id")
|
||||
kbId String @map("kb_id")
|
||||
|
||||
// 任务基本信息
|
||||
name String // 任务名称(用户可自定义)
|
||||
templateType String @map("template_type") // 'preset' | 'custom'
|
||||
templateId String? @map("template_id") // 预设模板ID(如'clinical_research')
|
||||
prompt String @db.Text // 提示词(完整的)
|
||||
|
||||
// 执行状态
|
||||
status String // 'processing' | 'completed' | 'failed' | 'paused'
|
||||
totalDocuments Int @map("total_documents")
|
||||
completedCount Int @default(0) @map("completed_count")
|
||||
failedCount Int @default(0) @map("failed_count")
|
||||
|
||||
// 配置
|
||||
modelType String @map("model_type") // 使用的模型
|
||||
concurrency Int @default(3) // 固定为3
|
||||
|
||||
// 时间统计
|
||||
startedAt DateTime? @map("started_at")
|
||||
completedAt DateTime? @map("completed_at")
|
||||
durationSeconds Int? @map("duration_seconds") // 执行时长(秒)
|
||||
|
||||
// 关联
|
||||
results BatchResult[]
|
||||
user User @relation(fields: [userId], references: [id], onDelete: Cascade)
|
||||
knowledgeBase KnowledgeBase @relation(fields: [kbId], references: [id], onDelete: Cascade)
|
||||
|
||||
createdAt DateTime @default(now()) @map("created_at")
|
||||
updatedAt DateTime @updatedAt @map("updated_at")
|
||||
|
||||
@@index([userId])
|
||||
@@index([kbId])
|
||||
@@index([status])
|
||||
@@index([createdAt])
|
||||
@@map("batch_tasks")
|
||||
@@schema("pkb_schema")
|
||||
}
|
||||
|
||||
// 批处理结果(每篇文献一条)
|
||||
model BatchResult {
|
||||
id String @id @default(uuid())
|
||||
taskId String @map("task_id")
|
||||
documentId String @map("document_id")
|
||||
|
||||
// 执行结果
|
||||
status String // 'success' | 'failed'
|
||||
data Json? // 提取的结构化数据(预设模板)或文本(自定义)
|
||||
rawOutput String? @db.Text @map("raw_output") // AI原始输出(备份)
|
||||
errorMessage String? @db.Text @map("error_message") // 错误信息
|
||||
|
||||
// 性能指标
|
||||
processingTimeMs Int? @map("processing_time_ms") // 处理时长(毫秒)
|
||||
tokensUsed Int? @map("tokens_used") // Token使用量
|
||||
|
||||
// 关联
|
||||
task BatchTask @relation(fields: [taskId], references: [id], onDelete: Cascade)
|
||||
document Document @relation(fields: [documentId], references: [id], onDelete: Cascade)
|
||||
|
||||
createdAt DateTime @default(now()) @map("created_at")
|
||||
|
||||
@@index([taskId])
|
||||
@@index([documentId])
|
||||
@@index([status])
|
||||
@@map("batch_results")
|
||||
@@schema("pkb_schema")
|
||||
}
|
||||
|
||||
// 任务模板(暂不实现,预留)
|
||||
model TaskTemplate {
|
||||
id String @id @default(uuid())
|
||||
userId String @map("user_id")
|
||||
|
||||
name String
|
||||
description String?
|
||||
prompt String @db.Text
|
||||
outputFields Json // 期望的输出字段定义
|
||||
isPublic Boolean @default(false) @map("is_public")
|
||||
|
||||
user User @relation(fields: [userId], references: [id], onDelete: Cascade)
|
||||
|
||||
createdAt DateTime @default(now()) @map("created_at")
|
||||
updatedAt DateTime @updatedAt @map("updated_at")
|
||||
|
||||
@@index([userId])
|
||||
@@map("task_templates")
|
||||
@@schema("pkb_schema")
|
||||
}
|
||||
|
||||
// ==================== 运营管理模块 ====================
|
||||
@@ -187,6 +302,7 @@ model AdminLog {
|
||||
@@index([createdAt])
|
||||
@@index([action])
|
||||
@@map("admin_logs")
|
||||
@@schema("public")
|
||||
}
|
||||
|
||||
// ==================== 通用对话模块 ====================
|
||||
@@ -208,6 +324,7 @@ model GeneralConversation {
|
||||
@@index([createdAt])
|
||||
@@index([updatedAt])
|
||||
@@map("general_conversations")
|
||||
@@schema("aia_schema")
|
||||
}
|
||||
|
||||
model GeneralMessage {
|
||||
@@ -226,4 +343,51 @@ model GeneralMessage {
|
||||
@@index([conversationId])
|
||||
@@index([createdAt])
|
||||
@@map("general_messages")
|
||||
@@schema("aia_schema")
|
||||
}
|
||||
|
||||
// ==================== 稿件审查模块 ====================
|
||||
|
||||
// 稿件审查任务
|
||||
model ReviewTask {
|
||||
id String @id @default(uuid())
|
||||
userId String @map("user_id")
|
||||
|
||||
// 文件信息
|
||||
fileName String @map("file_name")
|
||||
fileSize Int @map("file_size")
|
||||
filePath String? @map("file_path")
|
||||
|
||||
// 文档内容
|
||||
extractedText String @map("extracted_text") @db.Text
|
||||
wordCount Int? @map("word_count")
|
||||
|
||||
// 执行状态
|
||||
status String @default("pending")
|
||||
// pending, extracting, reviewing_editorial, reviewing_methodology, completed, failed
|
||||
|
||||
// 评估结果(JSON)
|
||||
editorialReview Json? @map("editorial_review")
|
||||
methodologyReview Json? @map("methodology_review")
|
||||
overallScore Float? @map("overall_score")
|
||||
|
||||
// 执行信息
|
||||
modelUsed String? @map("model_used")
|
||||
startedAt DateTime? @map("started_at")
|
||||
completedAt DateTime? @map("completed_at")
|
||||
durationSeconds Int? @map("duration_seconds")
|
||||
errorMessage String? @map("error_message") @db.Text
|
||||
|
||||
// 元数据
|
||||
createdAt DateTime @default(now()) @map("created_at")
|
||||
updatedAt DateTime @updatedAt @map("updated_at")
|
||||
|
||||
// 关联
|
||||
user User @relation(fields: [userId], references: [id], onDelete: Cascade)
|
||||
|
||||
@@index([userId])
|
||||
@@index([status])
|
||||
@@index([createdAt])
|
||||
@@map("review_tasks")
|
||||
@@schema("public")
|
||||
}
|
||||
|
||||
272
backend/src/common/document/ExtractionClient.ts
Normal file
272
backend/src/common/document/ExtractionClient.ts
Normal file
@@ -0,0 +1,272 @@
|
||||
import FormData from 'form-data';
|
||||
import axios from 'axios';
|
||||
|
||||
/**
|
||||
* Extraction Service Client
|
||||
* 调用Python微服务进行文档提取
|
||||
*/
|
||||
|
||||
const EXTRACTION_SERVICE_URL = process.env.EXTRACTION_SERVICE_URL || 'http://localhost:8000';
|
||||
|
||||
export interface ExtractionResult {
|
||||
success: boolean;
|
||||
method: string; // pymupdf/nougat/mammoth/direct
|
||||
text: string;
|
||||
quality?: number;
|
||||
encoding?: string;
|
||||
language?: string;
|
||||
metadata: {
|
||||
filename: string;
|
||||
char_count?: number;
|
||||
line_count?: number;
|
||||
file_size?: number;
|
||||
page_count?: number;
|
||||
has_tables?: boolean;
|
||||
[key: string]: any;
|
||||
};
|
||||
error?: string;
|
||||
}
|
||||
|
||||
class ExtractionClient {
|
||||
private baseUrl: string;
|
||||
|
||||
constructor(baseUrl: string = EXTRACTION_SERVICE_URL) {
|
||||
this.baseUrl = baseUrl;
|
||||
}
|
||||
|
||||
/**
|
||||
* 健康检查
|
||||
*/
|
||||
async health(): Promise<{
|
||||
status: string;
|
||||
checks: any;
|
||||
timestamp: string;
|
||||
}> {
|
||||
try {
|
||||
const response = await axios.get(`${this.baseUrl}/api/health`);
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('[ExtractionClient] Health check failed:', error);
|
||||
throw new Error('Extraction service is unavailable');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 通用文档提取接口
|
||||
* 自动检测文件类型并调用相应的提取方法
|
||||
*/
|
||||
async extractDocument(
|
||||
file: Buffer,
|
||||
filename: string
|
||||
): Promise<ExtractionResult> {
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file, filename);
|
||||
|
||||
const response = await axios.post<ExtractionResult>(
|
||||
`${this.baseUrl}/api/extract`,
|
||||
formData,
|
||||
{
|
||||
headers: {
|
||||
...formData.getHeaders(),
|
||||
},
|
||||
timeout: 120000, // 2分钟超时
|
||||
}
|
||||
);
|
||||
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('[ExtractionClient] Extract failed:', error);
|
||||
|
||||
if (axios.isAxiosError(error) && error.response) {
|
||||
throw new Error(`Extraction failed: ${error.response.data.detail || error.message}`);
|
||||
}
|
||||
|
||||
throw new Error('Document extraction failed');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* PDF专用提取接口
|
||||
*/
|
||||
async extractPdf(
|
||||
file: Buffer,
|
||||
filename: string,
|
||||
method?: 'auto' | 'nougat' | 'pymupdf'
|
||||
): Promise<ExtractionResult> {
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file, filename);
|
||||
|
||||
if (method) {
|
||||
formData.append('method', method);
|
||||
}
|
||||
|
||||
const response = await axios.post<ExtractionResult>(
|
||||
`${this.baseUrl}/api/extract/pdf`,
|
||||
formData,
|
||||
{
|
||||
headers: {
|
||||
...formData.getHeaders(),
|
||||
},
|
||||
timeout: 180000, // 3分钟超时(Nougat较慢)
|
||||
}
|
||||
);
|
||||
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('[ExtractionClient] PDF extract failed:', error);
|
||||
|
||||
if (axios.isAxiosError(error) && error.response) {
|
||||
throw new Error(`PDF extraction failed: ${error.response.data.detail || error.message}`);
|
||||
}
|
||||
|
||||
throw new Error('PDF extraction failed');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Docx专用提取接口
|
||||
*/
|
||||
async extractDocx(
|
||||
file: Buffer,
|
||||
filename: string
|
||||
): Promise<ExtractionResult> {
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file, filename);
|
||||
|
||||
const response = await axios.post<ExtractionResult>(
|
||||
`${this.baseUrl}/api/extract/docx`,
|
||||
formData,
|
||||
{
|
||||
headers: {
|
||||
...formData.getHeaders(),
|
||||
},
|
||||
timeout: 60000, // 1分钟超时
|
||||
}
|
||||
);
|
||||
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('[ExtractionClient] Docx extract failed:', error);
|
||||
|
||||
if (axios.isAxiosError(error) && error.response) {
|
||||
throw new Error(`Docx extraction failed: ${error.response.data.detail || error.message}`);
|
||||
}
|
||||
|
||||
throw new Error('Docx extraction failed');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Txt专用提取接口
|
||||
*/
|
||||
async extractTxt(
|
||||
file: Buffer,
|
||||
filename: string
|
||||
): Promise<ExtractionResult> {
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file, filename);
|
||||
|
||||
const response = await axios.post<ExtractionResult>(
|
||||
`${this.baseUrl}/api/extract/txt`,
|
||||
formData,
|
||||
{
|
||||
headers: {
|
||||
...formData.getHeaders(),
|
||||
},
|
||||
timeout: 30000, // 30秒超时
|
||||
}
|
||||
);
|
||||
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('[ExtractionClient] Txt extract failed:', error);
|
||||
|
||||
if (axios.isAxiosError(error) && error.response) {
|
||||
throw new Error(`Txt extraction failed: ${error.response.data.detail || error.message}`);
|
||||
}
|
||||
|
||||
throw new Error('Txt extraction failed');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 检测PDF语言
|
||||
*/
|
||||
async detectLanguage(
|
||||
file: Buffer,
|
||||
filename: string
|
||||
): Promise<{
|
||||
language: string;
|
||||
chinese_ratio: number;
|
||||
chinese_chars: number;
|
||||
total_chars: number;
|
||||
}> {
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file, filename);
|
||||
|
||||
const response = await axios.post(
|
||||
`${this.baseUrl}/api/detect-language`,
|
||||
formData,
|
||||
{
|
||||
headers: {
|
||||
...formData.getHeaders(),
|
||||
},
|
||||
timeout: 30000,
|
||||
}
|
||||
);
|
||||
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('[ExtractionClient] Language detection failed:', error);
|
||||
throw new Error('Language detection failed');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取PDF处理策略
|
||||
*/
|
||||
async getPdfStrategy(
|
||||
file: Buffer,
|
||||
filename: string
|
||||
): Promise<{
|
||||
detected_language: string;
|
||||
recommended_method: string;
|
||||
reason: string;
|
||||
nougat_available: boolean;
|
||||
}> {
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file, filename);
|
||||
|
||||
const response = await axios.post(
|
||||
`${this.baseUrl}/api/pdf-strategy`,
|
||||
formData,
|
||||
{
|
||||
headers: {
|
||||
...formData.getHeaders(),
|
||||
},
|
||||
timeout: 30000,
|
||||
}
|
||||
);
|
||||
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('[ExtractionClient] Get PDF strategy failed:', error);
|
||||
throw new Error('Get PDF strategy failed');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 导出单例
|
||||
export const extractionClient = new ExtractionClient();
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import axios from 'axios';
|
||||
import { ILLMAdapter, Message, LLMOptions, LLMResponse, StreamChunk } from './types.js';
|
||||
import { config } from '../config/env.js';
|
||||
import { config } from '../../../config/env.js';
|
||||
|
||||
export class DeepSeekAdapter implements ILLMAdapter {
|
||||
modelName: string;
|
||||
@@ -35,7 +35,7 @@ export class DeepSeekAdapter implements ILLMAdapter {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
},
|
||||
timeout: 60000, // 60秒超时
|
||||
timeout: 180000, // 180秒超时(3分钟)- 稿件评估需要更长时间
|
||||
}
|
||||
);
|
||||
|
||||
@@ -29,7 +29,11 @@ export class LLMFactory {
|
||||
break;
|
||||
|
||||
case 'qwen3-72b':
|
||||
adapter = new QwenAdapter('qwen-max'); // Qwen3-72B对应的模型名
|
||||
adapter = new QwenAdapter('qwen-plus'); // Qwen3-72B对应的模型名
|
||||
break;
|
||||
|
||||
case 'qwen-long':
|
||||
adapter = new QwenAdapter('qwen-long'); // 1M上下文超长文本模型
|
||||
break;
|
||||
|
||||
case 'gemini-pro':
|
||||
@@ -63,7 +67,7 @@ export class LLMFactory {
|
||||
* @returns 是否支持
|
||||
*/
|
||||
static isSupported(modelType: string): boolean {
|
||||
return ['deepseek-v3', 'qwen3-72b', 'gemini-pro'].includes(modelType);
|
||||
return ['deepseek-v3', 'qwen3-72b', 'qwen-long', 'gemini-pro'].includes(modelType);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -71,7 +75,7 @@ export class LLMFactory {
|
||||
* @returns 支持的模型列表
|
||||
*/
|
||||
static getSupportedModels(): ModelType[] {
|
||||
return ['deepseek-v3', 'qwen3-72b', 'gemini-pro'];
|
||||
return ['deepseek-v3', 'qwen3-72b', 'qwen-long', 'gemini-pro'];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import axios from 'axios';
|
||||
import { ILLMAdapter, Message, LLMOptions, LLMResponse, StreamChunk } from './types.js';
|
||||
import { config } from '../config/env.js';
|
||||
import { config } from '../../../config/env.js';
|
||||
|
||||
export class QwenAdapter implements ILLMAdapter {
|
||||
modelName: string;
|
||||
@@ -39,7 +39,7 @@ export class QwenAdapter implements ILLMAdapter {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
},
|
||||
timeout: 60000,
|
||||
timeout: 180000, // 180秒超时(3分钟)- 稿件评估需要更长时间
|
||||
}
|
||||
);
|
||||
|
||||
@@ -74,6 +74,15 @@ export class QwenAdapter implements ILLMAdapter {
|
||||
onChunk?: (chunk: StreamChunk) => void
|
||||
): AsyncGenerator<StreamChunk, void, unknown> {
|
||||
try {
|
||||
// Qwen-Long需要更长的超时时间(全文模式可能传输~750K tokens)
|
||||
const timeout = this.modelName === 'qwen-long' ? 300000 : 60000; // 5分钟 vs 1分钟
|
||||
|
||||
console.log(`[QwenAdapter] 开始流式调用`, {
|
||||
model: this.modelName,
|
||||
timeout: `${timeout / 1000}秒`,
|
||||
messagesCount: messages.length,
|
||||
});
|
||||
|
||||
const response = await axios.post(
|
||||
this.baseURL,
|
||||
{
|
||||
@@ -96,7 +105,7 @@ export class QwenAdapter implements ILLMAdapter {
|
||||
'X-DashScope-SSE': 'enable',
|
||||
},
|
||||
responseType: 'stream',
|
||||
timeout: 60000,
|
||||
timeout: timeout,
|
||||
}
|
||||
);
|
||||
|
||||
@@ -51,7 +51,7 @@ export interface ILLMAdapter {
|
||||
}
|
||||
|
||||
// 支持的模型类型
|
||||
export type ModelType = 'deepseek-v3' | 'qwen3-72b' | 'gemini-pro';
|
||||
export type ModelType = 'deepseek-v3' | 'qwen3-72b' | 'qwen-long' | 'gemini-pro';
|
||||
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ import {
|
||||
DifyError,
|
||||
DifyErrorResponse,
|
||||
} from './types.js';
|
||||
import { config } from '../config/env.js';
|
||||
import { config } from '../../config/env.js';
|
||||
|
||||
/**
|
||||
* Dify API 客户端
|
||||
@@ -144,7 +144,7 @@ export class DifyClient {
|
||||
],
|
||||
segmentation: {
|
||||
separator: '\n',
|
||||
max_tokens: 500,
|
||||
max_tokens: 1500, // Phase 1优化:从500增加到1500 tokens
|
||||
},
|
||||
},
|
||||
},
|
||||
152
backend/src/common/utils/jsonParser.ts
Normal file
152
backend/src/common/utils/jsonParser.ts
Normal file
@@ -0,0 +1,152 @@
|
||||
/**
|
||||
* Phase 3: 批处理模式 - JSON解析工具
|
||||
*
|
||||
* AI的输出可能包含额外的文字说明,需要提取JSON块并解析
|
||||
*/
|
||||
|
||||
export interface ParseResult<T = any> {
|
||||
success: boolean;
|
||||
data?: T;
|
||||
error?: string;
|
||||
rawOutput: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* 从AI输出中提取JSON块
|
||||
*
|
||||
* 支持的格式:
|
||||
* 1. 纯JSON:{ "key": "value" }
|
||||
* 2. 带前言:这是提取结果:\n{ "key": "value" }
|
||||
* 3. 带后缀:{ "key": "value" }\n\n以上是提取结果
|
||||
* 4. 代码块:```json\n{ "key": "value" }\n```
|
||||
*/
|
||||
export function extractJSON(text: string): string | null {
|
||||
// 尝试1:直接查找 {...} 或 [...]
|
||||
const jsonPattern = /(\{[\s\S]*\}|\[[\s\S]*\])/;
|
||||
const match = text.match(jsonPattern);
|
||||
|
||||
if (match) {
|
||||
return match[1];
|
||||
}
|
||||
|
||||
// 尝试2:查找代码块中的JSON
|
||||
const codeBlockPattern = /```(?:json)?\s*\n?([\s\S]*?)\n?```/;
|
||||
const codeMatch = text.match(codeBlockPattern);
|
||||
|
||||
if (codeMatch) {
|
||||
return codeMatch[1].trim();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析JSON字符串
|
||||
*
|
||||
* @param jsonString JSON字符串
|
||||
* @param expectedFields 期望的字段列表(可选,用于验证)
|
||||
* @returns 解析结果
|
||||
*/
|
||||
export function parseJSON<T = any>(
|
||||
jsonString: string,
|
||||
expectedFields?: string[]
|
||||
): ParseResult<T> {
|
||||
const rawOutput = jsonString;
|
||||
|
||||
try {
|
||||
// 提取JSON块
|
||||
const extracted = extractJSON(jsonString);
|
||||
|
||||
if (!extracted) {
|
||||
return {
|
||||
success: false,
|
||||
error: '未找到JSON格式的数据',
|
||||
rawOutput,
|
||||
};
|
||||
}
|
||||
|
||||
// 解析JSON
|
||||
const data = JSON.parse(extracted) as T;
|
||||
|
||||
// 验证字段(如果提供了expectedFields)
|
||||
if (expectedFields && Array.isArray(expectedFields)) {
|
||||
const missingFields: string[] = [];
|
||||
|
||||
for (const field of expectedFields) {
|
||||
if (!(field in (data as any))) {
|
||||
missingFields.push(field);
|
||||
}
|
||||
}
|
||||
|
||||
if (missingFields.length > 0) {
|
||||
console.warn(`[JsonParser] 缺少字段: ${missingFields.join(', ')}`);
|
||||
// 为缺失字段填充默认值
|
||||
for (const field of missingFields) {
|
||||
(data as any)[field] = '未提取到';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data,
|
||||
rawOutput,
|
||||
};
|
||||
|
||||
} catch (error: any) {
|
||||
return {
|
||||
success: false,
|
||||
error: error.message,
|
||||
rawOutput,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 验证JSON数据是否符合模板要求
|
||||
*
|
||||
* @param data 解析后的数据
|
||||
* @param templateFields 模板字段定义
|
||||
* @returns 是否有效
|
||||
*/
|
||||
export function validateTemplateData(
|
||||
data: any,
|
||||
templateFields: Array<{ key: string; type: string }>
|
||||
): { valid: boolean; errors: string[] } {
|
||||
const errors: string[] = [];
|
||||
|
||||
if (!data || typeof data !== 'object') {
|
||||
errors.push('数据不是有效的对象');
|
||||
return { valid: false, errors };
|
||||
}
|
||||
|
||||
for (const field of templateFields) {
|
||||
const value = data[field.key];
|
||||
|
||||
// 检查字段是否存在
|
||||
if (value === undefined || value === null || value === '') {
|
||||
console.warn(`[JsonParser] 字段 ${field.key} 为空`);
|
||||
// 不算错误,只是警告
|
||||
}
|
||||
|
||||
// 类型检查(宽松)
|
||||
if (field.type === 'number' && typeof value !== 'number' && value !== '') {
|
||||
// 尝试转换
|
||||
const num = Number(value);
|
||||
if (!isNaN(num)) {
|
||||
data[field.key] = num;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { valid: errors.length === 0, errors };
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -27,9 +27,16 @@ export const config = {
|
||||
|
||||
// LLM API配置
|
||||
deepseekApiKey: process.env.DEEPSEEK_API_KEY || '',
|
||||
deepseekBaseUrl: process.env.DEEPSEEK_BASE_URL || 'https://api.deepseek.com',
|
||||
|
||||
dashscopeApiKey: process.env.DASHSCOPE_API_KEY || '', // 用于Qwen模型
|
||||
geminiApiKey: process.env.GEMINI_API_KEY || '',
|
||||
|
||||
// CloseAI配置(代理OpenAI和Claude)
|
||||
closeaiApiKey: process.env.CLOSEAI_API_KEY || '',
|
||||
closeaiOpenaiBaseUrl: process.env.CLOSEAI_OPENAI_BASE_URL || 'https://api.openai-proxy.org/v1',
|
||||
closeaiClaudeBaseUrl: process.env.CLOSEAI_CLAUDE_BASE_URL || 'https://api.openai-proxy.org/anthropic',
|
||||
|
||||
// Dify配置
|
||||
difyApiKey: process.env.DIFY_API_KEY || '',
|
||||
difyApiUrl: process.env.DIFY_API_URL || 'http://localhost/v1',
|
||||
|
||||
@@ -1,304 +0,0 @@
|
||||
import { FastifyRequest, FastifyReply } from 'fastify';
|
||||
import { ModelType } from '../adapters/types.js';
|
||||
import { LLMFactory } from '../adapters/LLMFactory.js';
|
||||
import * as knowledgeBaseService from '../services/knowledgeBaseService.js';
|
||||
import { prisma } from '../config/database.js';
|
||||
|
||||
interface SendChatMessageBody {
|
||||
content: string;
|
||||
modelType: ModelType;
|
||||
knowledgeBaseIds?: string[];
|
||||
conversationId?: string; // 可选:续接已有对话
|
||||
}
|
||||
|
||||
/**
|
||||
* 通用聊天Controller
|
||||
* 无需项目和智能体,纯大模型对话
|
||||
*/
|
||||
export class ChatController {
|
||||
/**
|
||||
* 发送消息(流式输出)
|
||||
*/
|
||||
async sendMessageStream(
|
||||
request: FastifyRequest<{ Body: SendChatMessageBody }>,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
try {
|
||||
// TODO: 从JWT token获取userId
|
||||
const userId = 'user-mock-001';
|
||||
|
||||
const { content, modelType, knowledgeBaseIds, conversationId } = request.body;
|
||||
|
||||
console.log('💬 [ChatController] 收到通用对话请求', {
|
||||
content,
|
||||
modelType,
|
||||
knowledgeBaseIds: knowledgeBaseIds || [],
|
||||
conversationId,
|
||||
});
|
||||
|
||||
// 验证modelType
|
||||
if (modelType !== 'deepseek-v3' && modelType !== 'qwen3-72b' && modelType !== 'gemini-pro') {
|
||||
reply.code(400).send({
|
||||
success: false,
|
||||
message: `不支持的模型类型: ${modelType}`,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// 获取或创建对话记录
|
||||
let conversation;
|
||||
if (conversationId) {
|
||||
// 验证对话是否存在且属于当前用户
|
||||
conversation = await prisma.generalConversation.findFirst({
|
||||
where: {
|
||||
id: conversationId,
|
||||
userId,
|
||||
deletedAt: null,
|
||||
},
|
||||
});
|
||||
|
||||
if (!conversation) {
|
||||
reply.code(404).send({
|
||||
success: false,
|
||||
message: '对话不存在',
|
||||
});
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// 创建新对话
|
||||
conversation = await prisma.generalConversation.create({
|
||||
data: {
|
||||
userId,
|
||||
title: content.substring(0, 50), // 用第一条消息的前50字作为标题
|
||||
modelName: modelType,
|
||||
},
|
||||
});
|
||||
console.log('✅ [ChatController] 创建新对话', { conversationId: conversation.id });
|
||||
}
|
||||
|
||||
// 检索知识库上下文
|
||||
let knowledgeBaseContext = '';
|
||||
if (knowledgeBaseIds && knowledgeBaseIds.length > 0) {
|
||||
console.log('📚 [ChatController] 开始检索知识库');
|
||||
const knowledgeResults: string[] = [];
|
||||
|
||||
for (const kbId of knowledgeBaseIds) {
|
||||
try {
|
||||
const searchResult = await knowledgeBaseService.searchKnowledgeBase(
|
||||
userId,
|
||||
kbId,
|
||||
content,
|
||||
3
|
||||
);
|
||||
|
||||
if (searchResult.records && searchResult.records.length > 0) {
|
||||
const kbInfo = await prisma.knowledgeBase.findUnique({
|
||||
where: { id: kbId },
|
||||
select: { name: true },
|
||||
});
|
||||
|
||||
knowledgeResults.push(
|
||||
`【知识库:${kbInfo?.name || '未命名'}】\n` +
|
||||
searchResult.records
|
||||
.map((record: any, index: number) => {
|
||||
const score = (record.score * 100).toFixed(1);
|
||||
return `${index + 1}. [相关度${score}%] ${record.segment.content}`;
|
||||
})
|
||||
.join('\n\n')
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`❌ [ChatController] 检索知识库失败 ${kbId}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
if (knowledgeResults.length > 0) {
|
||||
knowledgeBaseContext = knowledgeResults.join('\n\n---\n\n');
|
||||
console.log(`💾 [ChatController] 知识库上下文: ${knowledgeBaseContext.length} 字符`);
|
||||
}
|
||||
}
|
||||
|
||||
// 获取历史消息(最近20条)
|
||||
const historyMessages = await prisma.generalMessage.findMany({
|
||||
where: {
|
||||
conversationId: conversation.id,
|
||||
},
|
||||
orderBy: {
|
||||
createdAt: 'desc',
|
||||
},
|
||||
take: 20,
|
||||
});
|
||||
historyMessages.reverse();
|
||||
console.log(`📜 [ChatController] 历史消息数: ${historyMessages.length}`);
|
||||
|
||||
// 组装消息上下文
|
||||
const messages: any[] = [
|
||||
{
|
||||
role: 'system',
|
||||
content: '你是一个专业、友好的AI助手。当用户提供参考资料时,请优先基于参考资料回答。',
|
||||
},
|
||||
];
|
||||
|
||||
// 添加历史消息
|
||||
for (const msg of historyMessages) {
|
||||
messages.push({
|
||||
role: msg.role,
|
||||
content: msg.content,
|
||||
});
|
||||
}
|
||||
|
||||
// 添加当前用户消息
|
||||
let userContent = content;
|
||||
if (knowledgeBaseContext) {
|
||||
userContent = `${content}\n\n## 参考资料(来自知识库)\n${knowledgeBaseContext}`;
|
||||
}
|
||||
messages.push({
|
||||
role: 'user',
|
||||
content: userContent,
|
||||
});
|
||||
|
||||
// 设置SSE响应头
|
||||
reply.raw.writeHead(200, {
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache',
|
||||
Connection: 'keep-alive',
|
||||
'Access-Control-Allow-Origin': '*',
|
||||
});
|
||||
|
||||
// 保存用户消息
|
||||
await prisma.generalMessage.create({
|
||||
data: {
|
||||
conversationId: conversation.id,
|
||||
role: 'user',
|
||||
content,
|
||||
metadata: {
|
||||
knowledgeBaseIds,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
// 流式输出
|
||||
const adapter = LLMFactory.getAdapter(modelType);
|
||||
let fullContent = '';
|
||||
let usage: any = null;
|
||||
|
||||
for await (const chunk of adapter.chatStream(messages, {
|
||||
temperature: 0.7,
|
||||
maxTokens: 2000,
|
||||
})) {
|
||||
fullContent += chunk.content;
|
||||
|
||||
if (chunk.usage) {
|
||||
usage = chunk.usage;
|
||||
}
|
||||
|
||||
// 发送SSE数据
|
||||
reply.raw.write(`data: ${JSON.stringify(chunk)}\n\n`);
|
||||
}
|
||||
|
||||
// 保存助手消息
|
||||
await prisma.generalMessage.create({
|
||||
data: {
|
||||
conversationId: conversation.id,
|
||||
role: 'assistant',
|
||||
content: fullContent,
|
||||
model: modelType,
|
||||
tokens: usage?.totalTokens,
|
||||
metadata: {
|
||||
usage,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
// 更新对话
|
||||
await prisma.generalConversation.update({
|
||||
where: { id: conversation.id },
|
||||
data: {
|
||||
updatedAt: new Date(),
|
||||
},
|
||||
});
|
||||
|
||||
// 发送完成信号
|
||||
reply.raw.write(`data: [DONE]\n\n`);
|
||||
reply.raw.end();
|
||||
|
||||
console.log('✅ [ChatController] 对话完成');
|
||||
} catch (error: any) {
|
||||
console.error('❌ [ChatController] 错误:', error);
|
||||
reply.code(500).send({
|
||||
success: false,
|
||||
message: error.message || '服务器错误',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取对话列表
|
||||
*/
|
||||
async getConversations(
|
||||
request: FastifyRequest,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
try {
|
||||
const userId = 'user-mock-001';
|
||||
|
||||
const conversations = await prisma.generalConversation.findMany({
|
||||
where: {
|
||||
userId,
|
||||
deletedAt: null,
|
||||
},
|
||||
orderBy: {
|
||||
updatedAt: 'desc',
|
||||
},
|
||||
take: 50,
|
||||
});
|
||||
|
||||
reply.send({
|
||||
success: true,
|
||||
data: conversations,
|
||||
});
|
||||
} catch (error: any) {
|
||||
reply.code(500).send({
|
||||
success: false,
|
||||
message: error.message || '获取对话列表失败',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除对话
|
||||
*/
|
||||
async deleteConversation(
|
||||
request: FastifyRequest<{ Params: { id: string } }>,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
try {
|
||||
const userId = 'user-mock-001';
|
||||
const { id } = request.params;
|
||||
|
||||
await prisma.generalConversation.update({
|
||||
where: {
|
||||
id,
|
||||
userId,
|
||||
},
|
||||
data: {
|
||||
deletedAt: new Date(),
|
||||
},
|
||||
});
|
||||
|
||||
reply.send({
|
||||
success: true,
|
||||
message: '删除成功',
|
||||
});
|
||||
} catch (error: any) {
|
||||
reply.code(500).send({
|
||||
success: false,
|
||||
message: error.message || '删除失败',
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export const chatController = new ChatController();
|
||||
|
||||
|
||||
@@ -3,11 +3,13 @@ import cors from '@fastify/cors';
|
||||
import multipart from '@fastify/multipart';
|
||||
import { config, validateEnv } from './config/env.js';
|
||||
import { testDatabaseConnection, prisma } from './config/database.js';
|
||||
import { projectRoutes } from './routes/projects.js';
|
||||
import { agentRoutes } from './routes/agents.js';
|
||||
import { conversationRoutes } from './routes/conversations.js';
|
||||
import knowledgeBaseRoutes from './routes/knowledgeBases.js';
|
||||
import { chatRoutes } from './routes/chatRoutes.js';
|
||||
import { projectRoutes } from './legacy/routes/projects.js';
|
||||
import { agentRoutes } from './legacy/routes/agents.js';
|
||||
import { conversationRoutes } from './legacy/routes/conversations.js';
|
||||
import knowledgeBaseRoutes from './legacy/routes/knowledgeBases.js';
|
||||
import { chatRoutes } from './legacy/routes/chatRoutes.js';
|
||||
import { batchRoutes } from './legacy/routes/batchRoutes.js';
|
||||
import reviewRoutes from './legacy/routes/reviewRoutes.js';
|
||||
|
||||
|
||||
// 全局处理BigInt序列化
|
||||
@@ -93,6 +95,12 @@ await fastify.register(knowledgeBaseRoutes, { prefix: '/api/v1' });
|
||||
// 注册通用对话路由
|
||||
await fastify.register(chatRoutes, { prefix: '/api/v1' });
|
||||
|
||||
// Phase 3: 注册批处理路由
|
||||
await fastify.register(batchRoutes, { prefix: '/api/v1' });
|
||||
|
||||
// 注册稿件审查路由
|
||||
await fastify.register(reviewRoutes, { prefix: '/api/v1' });
|
||||
|
||||
// 启动服务器
|
||||
const start = async () => {
|
||||
try {
|
||||
|
||||
428
backend/src/legacy/controllers/batchController.ts
Normal file
428
backend/src/legacy/controllers/batchController.ts
Normal file
@@ -0,0 +1,428 @@
|
||||
/**
|
||||
* Phase 3: 批处理模式 - 批处理控制器
|
||||
*
|
||||
* API路由:
|
||||
* - POST /api/v1/batch/execute - 执行批处理任务
|
||||
* - GET /api/v1/batch/tasks/:taskId - 获取任务状态
|
||||
* - GET /api/v1/batch/tasks/:taskId/results - 获取任务结果
|
||||
* - POST /api/v1/batch/tasks/:taskId/retry-failed - 重试失败项
|
||||
*/
|
||||
|
||||
import { FastifyRequest, FastifyReply } from 'fastify';
|
||||
import { executeBatchTask, retryFailedDocuments, BatchProgress } from '../services/batchService.js';
|
||||
import { prisma } from '../../config/database.js';
|
||||
import { ModelType } from '../../common/llm/adapters/types.js';
|
||||
|
||||
// ==================== 类型定义 ====================
|
||||
|
||||
interface ExecuteBatchBody {
|
||||
kb_id: string;
|
||||
document_ids: string[];
|
||||
template_type: 'preset' | 'custom';
|
||||
template_id?: string;
|
||||
custom_prompt?: string;
|
||||
model_type: ModelType;
|
||||
task_name?: string;
|
||||
}
|
||||
|
||||
interface TaskIdParams {
|
||||
taskId: string;
|
||||
}
|
||||
|
||||
// ==================== API处理器 ====================
|
||||
|
||||
/**
|
||||
* POST /api/v1/batch/execute
|
||||
* 执行批处理任务
|
||||
*/
|
||||
export async function executeBatch(
|
||||
request: FastifyRequest<{ Body: ExecuteBatchBody }>,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
try {
|
||||
// TODO: 从JWT获取userId
|
||||
const userId = 'user-mock-001';
|
||||
|
||||
const {
|
||||
kb_id,
|
||||
document_ids,
|
||||
template_type,
|
||||
template_id,
|
||||
custom_prompt,
|
||||
model_type,
|
||||
task_name,
|
||||
} = request.body;
|
||||
|
||||
console.log('📦 [BatchController] 收到批处理请求', {
|
||||
userId,
|
||||
kbId: kb_id,
|
||||
documentCount: document_ids.length,
|
||||
templateType: template_type,
|
||||
modelType: model_type,
|
||||
});
|
||||
|
||||
// 验证参数
|
||||
if (!kb_id || !document_ids || document_ids.length === 0) {
|
||||
return reply.code(400).send({
|
||||
success: false,
|
||||
message: '缺少必要参数:kb_id 或 document_ids',
|
||||
});
|
||||
}
|
||||
|
||||
if (document_ids.length < 3) {
|
||||
return reply.code(400).send({
|
||||
success: false,
|
||||
message: '文献数量不能少于3篇',
|
||||
});
|
||||
}
|
||||
|
||||
if (document_ids.length > 50) {
|
||||
return reply.code(400).send({
|
||||
success: false,
|
||||
message: '文献数量不能超过50篇',
|
||||
});
|
||||
}
|
||||
|
||||
if (template_type === 'preset' && !template_id) {
|
||||
return reply.code(400).send({
|
||||
success: false,
|
||||
message: '预设模板类型需要提供 template_id',
|
||||
});
|
||||
}
|
||||
|
||||
if (template_type === 'custom' && !custom_prompt) {
|
||||
return reply.code(400).send({
|
||||
success: false,
|
||||
message: '自定义模板需要提供 custom_prompt',
|
||||
});
|
||||
}
|
||||
|
||||
// 验证模型类型
|
||||
const validModels: ModelType[] = ['deepseek-v3', 'qwen3-72b', 'qwen-long'];
|
||||
if (!validModels.includes(model_type)) {
|
||||
return reply.code(400).send({
|
||||
success: false,
|
||||
message: `不支持的模型类型: ${model_type}`,
|
||||
});
|
||||
}
|
||||
|
||||
// 验证知识库是否存在
|
||||
const kb = await prisma.knowledgeBase.findUnique({
|
||||
where: { id: kb_id },
|
||||
});
|
||||
|
||||
if (!kb) {
|
||||
return reply.code(404).send({
|
||||
success: false,
|
||||
message: `知识库不存在: ${kb_id}`,
|
||||
});
|
||||
}
|
||||
|
||||
// 验证文档是否都存在
|
||||
const documents = await prisma.document.findMany({
|
||||
where: {
|
||||
id: { in: document_ids },
|
||||
kbId: kb_id,
|
||||
},
|
||||
});
|
||||
|
||||
if (documents.length !== document_ids.length) {
|
||||
return reply.code(400).send({
|
||||
success: false,
|
||||
message: `部分文档不存在或不属于该知识库`,
|
||||
});
|
||||
}
|
||||
|
||||
// 获取WebSocket实例(用于进度推送)
|
||||
const io = (request.server as any).io;
|
||||
|
||||
// 先创建任务记录获取taskId
|
||||
const taskPreview = await prisma.batchTask.create({
|
||||
data: {
|
||||
userId,
|
||||
kbId: kb_id,
|
||||
name: task_name || `批处理任务_${new Date().toLocaleString('zh-CN')}`,
|
||||
templateType: template_type,
|
||||
templateId: template_id || null,
|
||||
prompt: custom_prompt || template_id || '',
|
||||
status: 'processing',
|
||||
totalDocuments: document_ids.length,
|
||||
modelType: model_type,
|
||||
concurrency: 3,
|
||||
startedAt: new Date(),
|
||||
},
|
||||
});
|
||||
|
||||
const taskId = taskPreview.id;
|
||||
console.log(`✅ [BatchController] 创建任务: ${taskId}`);
|
||||
|
||||
// 执行批处理任务(异步)
|
||||
executeBatchTask({
|
||||
userId,
|
||||
kbId: kb_id,
|
||||
documentIds: document_ids,
|
||||
templateType: template_type,
|
||||
templateId: template_id,
|
||||
customPrompt: custom_prompt,
|
||||
modelType: model_type,
|
||||
taskName: task_name,
|
||||
existingTaskId: taskId, // 使用已创建的任务ID
|
||||
onProgress: (progress: BatchProgress) => {
|
||||
// WebSocket推送进度
|
||||
if (io) {
|
||||
io.to(userId).emit('batch-progress', progress);
|
||||
}
|
||||
},
|
||||
})
|
||||
.then((result) => {
|
||||
console.log(`🎉 [BatchController] 批处理任务完成: ${result.taskId}`);
|
||||
// 推送完成事件
|
||||
if (io) {
|
||||
io.to(userId).emit('batch-completed', {
|
||||
task_id: result.taskId,
|
||||
status: result.status,
|
||||
});
|
||||
}
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error(`❌ [BatchController] 批处理任务失败:`, error);
|
||||
// 推送失败事件
|
||||
if (io) {
|
||||
io.to(userId).emit('batch-failed', {
|
||||
task_id: 'unknown',
|
||||
error: error.message,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// 立即返回任务ID(任务在后台执行)
|
||||
reply.send({
|
||||
success: true,
|
||||
message: '批处理任务已开始',
|
||||
data: {
|
||||
task_id: taskId,
|
||||
status: 'processing',
|
||||
websocket_event: 'batch-progress',
|
||||
},
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('❌ [BatchController] 执行批处理失败:', error);
|
||||
reply.code(500).send({
|
||||
success: false,
|
||||
message: error.message || '执行批处理任务失败',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* GET /api/v1/batch/tasks/:taskId
|
||||
* 获取任务状态
|
||||
*/
|
||||
export async function getTask(
|
||||
request: FastifyRequest<{ Params: TaskIdParams }>,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
try {
|
||||
const { taskId } = request.params;
|
||||
|
||||
const task = await prisma.batchTask.findUnique({
|
||||
where: { id: taskId },
|
||||
select: {
|
||||
id: true,
|
||||
name: true,
|
||||
status: true,
|
||||
totalDocuments: true,
|
||||
completedCount: true,
|
||||
failedCount: true,
|
||||
modelType: true,
|
||||
startedAt: true,
|
||||
completedAt: true,
|
||||
durationSeconds: true,
|
||||
createdAt: true,
|
||||
},
|
||||
});
|
||||
|
||||
if (!task) {
|
||||
return reply.code(404).send({
|
||||
success: false,
|
||||
message: `任务不存在: ${taskId}`,
|
||||
});
|
||||
}
|
||||
|
||||
reply.send({
|
||||
success: true,
|
||||
data: {
|
||||
id: task.id,
|
||||
name: task.name,
|
||||
status: task.status,
|
||||
total_documents: task.totalDocuments,
|
||||
completed_count: task.completedCount,
|
||||
failed_count: task.failedCount,
|
||||
model_type: task.modelType,
|
||||
started_at: task.startedAt,
|
||||
completed_at: task.completedAt,
|
||||
duration_seconds: task.durationSeconds,
|
||||
created_at: task.createdAt,
|
||||
},
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('❌ [BatchController] 获取任务失败:', error);
|
||||
reply.code(500).send({
|
||||
success: false,
|
||||
message: error.message || '获取任务失败',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* GET /api/v1/batch/tasks/:taskId/results
|
||||
* 获取任务结果
|
||||
*/
|
||||
export async function getTaskResults(
|
||||
request: FastifyRequest<{ Params: TaskIdParams }>,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
try {
|
||||
const { taskId } = request.params;
|
||||
|
||||
// 获取任务信息
|
||||
const task = await prisma.batchTask.findUnique({
|
||||
where: { id: taskId },
|
||||
include: {
|
||||
results: {
|
||||
include: {
|
||||
document: {
|
||||
select: {
|
||||
filename: true,
|
||||
tokensCount: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
orderBy: {
|
||||
createdAt: 'asc',
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
if (!task) {
|
||||
return reply.code(404).send({
|
||||
success: false,
|
||||
message: `任务不存在: ${taskId}`,
|
||||
});
|
||||
}
|
||||
|
||||
// 格式化结果
|
||||
const results = task.results.map((r, index) => ({
|
||||
id: r.id,
|
||||
index: index + 1,
|
||||
document_id: r.documentId,
|
||||
document_name: r.document.filename,
|
||||
status: r.status,
|
||||
data: r.data,
|
||||
raw_output: r.rawOutput,
|
||||
error_message: r.errorMessage,
|
||||
processing_time_ms: r.processingTimeMs,
|
||||
tokens_used: r.tokensUsed,
|
||||
created_at: r.createdAt,
|
||||
}));
|
||||
|
||||
reply.send({
|
||||
success: true,
|
||||
data: {
|
||||
task: {
|
||||
id: task.id,
|
||||
name: task.name,
|
||||
status: task.status,
|
||||
template_type: task.templateType,
|
||||
template_id: task.templateId,
|
||||
total_documents: task.totalDocuments,
|
||||
completed_count: task.completedCount,
|
||||
failed_count: task.failedCount,
|
||||
duration_seconds: task.durationSeconds,
|
||||
created_at: task.createdAt,
|
||||
completed_at: task.completedAt,
|
||||
},
|
||||
results,
|
||||
},
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('❌ [BatchController] 获取任务结果失败:', error);
|
||||
reply.code(500).send({
|
||||
success: false,
|
||||
message: error.message || '获取任务结果失败',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* POST /api/v1/batch/tasks/:taskId/retry-failed
|
||||
* 重试失败的文档
|
||||
*/
|
||||
export async function retryFailed(
|
||||
request: FastifyRequest<{ Params: TaskIdParams }>,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
try {
|
||||
const { taskId } = request.params;
|
||||
const userId = 'user-mock-001'; // TODO: 从JWT获取
|
||||
|
||||
// 获取WebSocket实例
|
||||
const io = (request.server as any).io;
|
||||
|
||||
// 执行重试(异步)
|
||||
retryFailedDocuments(taskId, (progress: BatchProgress) => {
|
||||
if (io) {
|
||||
io.to(userId).emit('batch-progress', progress);
|
||||
}
|
||||
})
|
||||
.then((result) => {
|
||||
console.log(`✅ [BatchController] 重试完成: ${result.retriedCount}篇`);
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error(`❌ [BatchController] 重试失败:`, error);
|
||||
});
|
||||
|
||||
reply.send({
|
||||
success: true,
|
||||
message: '已开始重试失败的文档',
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('❌ [BatchController] 重试失败:', error);
|
||||
reply.code(500).send({
|
||||
success: false,
|
||||
message: error.message || '重试失败',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* GET /api/v1/batch/templates
|
||||
* 获取所有预设模板
|
||||
*/
|
||||
export async function getTemplates(
|
||||
request: FastifyRequest,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
try {
|
||||
const { getAllTemplates } = await import('../templates/clinicalResearch.js');
|
||||
const templates = getAllTemplates();
|
||||
|
||||
reply.send({
|
||||
success: true,
|
||||
data: templates.map(t => ({
|
||||
id: t.id,
|
||||
name: t.name,
|
||||
description: t.description,
|
||||
output_fields: t.outputFields,
|
||||
})),
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('❌ [BatchController] 获取模板失败:', error);
|
||||
reply.code(500).send({
|
||||
success: false,
|
||||
message: error.message || '获取模板失败',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
566
backend/src/legacy/controllers/chatController.ts
Normal file
566
backend/src/legacy/controllers/chatController.ts
Normal file
@@ -0,0 +1,566 @@
|
||||
import { FastifyRequest, FastifyReply } from 'fastify';
|
||||
import { ModelType } from '../../common/llm/adapters/types.js';
|
||||
import { LLMFactory } from '../../common/llm/adapters/LLMFactory.js';
|
||||
import * as knowledgeBaseService from '../services/knowledgeBaseService.js';
|
||||
import { prisma } from '../../config/database.js';
|
||||
|
||||
/**
|
||||
* 引用信息接口
|
||||
*/
|
||||
interface Citation {
|
||||
id: number;
|
||||
fileName: string;
|
||||
position: number;
|
||||
score: number;
|
||||
content: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* 提取文本片段(用于引用上下文)
|
||||
*/
|
||||
function extractContextPreview(text: string, maxLength: number = 100): string {
|
||||
if (!text) return '';
|
||||
|
||||
const cleaned = text.replace(/\s+/g, ' ').trim();
|
||||
if (cleaned.length <= maxLength) {
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
const truncated = cleaned.substring(0, maxLength);
|
||||
const lastPunctuation = Math.max(
|
||||
truncated.lastIndexOf('。'),
|
||||
truncated.lastIndexOf('!'),
|
||||
truncated.lastIndexOf('?'),
|
||||
truncated.lastIndexOf('.'),
|
||||
truncated.lastIndexOf('!'),
|
||||
truncated.lastIndexOf('?')
|
||||
);
|
||||
|
||||
if (lastPunctuation > maxLength * 0.5) {
|
||||
return truncated.substring(0, lastPunctuation + 1);
|
||||
}
|
||||
|
||||
return truncated + '...';
|
||||
}
|
||||
|
||||
/**
|
||||
* 格式化引用清单
|
||||
*/
|
||||
function formatCitations(citations: Citation[]): string {
|
||||
if (citations.length === 0) return '';
|
||||
|
||||
let result = '\n\n---\n\n📚 **参考文献**\n\n';
|
||||
|
||||
for (const cite of citations) {
|
||||
const scorePercent = (cite.score * 100).toFixed(0);
|
||||
const preview = extractContextPreview(cite.content, 100);
|
||||
|
||||
// 使用HTML span标签给引用编号添加id,方便跳转
|
||||
result += `<span id="citation-detail-${cite.id}">[${cite.id}]</span> 📄 **${cite.fileName}** - 第${cite.position}段 (相关度${scorePercent}%)\n`;
|
||||
result += ` "${preview}"\n\n`;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
interface SendChatMessageBody {
|
||||
content: string;
|
||||
modelType: ModelType;
|
||||
knowledgeBaseIds?: string[];
|
||||
documentIds?: string[]; // Phase 2: 逐篇精读模式 - 限定文档范围
|
||||
fullTextDocumentIds?: string[]; // Phase 2: 全文阅读模式 - 传递全文
|
||||
conversationId?: string; // 可选:续接已有对话
|
||||
}
|
||||
|
||||
/**
|
||||
* 通用聊天Controller
|
||||
* 无需项目和智能体,纯大模型对话
|
||||
*/
|
||||
export class ChatController {
|
||||
/**
|
||||
* 发送消息(流式输出)
|
||||
*/
|
||||
async sendMessageStream(
|
||||
request: FastifyRequest<{ Body: SendChatMessageBody }>,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
try {
|
||||
// TODO: 从JWT token获取userId
|
||||
const userId = 'user-mock-001';
|
||||
|
||||
const { content, modelType, knowledgeBaseIds, documentIds, fullTextDocumentIds, conversationId } = request.body;
|
||||
|
||||
console.log('💬 [ChatController] 收到通用对话请求', {
|
||||
content,
|
||||
modelType,
|
||||
knowledgeBaseIds: knowledgeBaseIds || [],
|
||||
documentIds: documentIds || [],
|
||||
fullTextDocumentIds: fullTextDocumentIds || [],
|
||||
conversationId,
|
||||
});
|
||||
|
||||
// 验证modelType
|
||||
if (modelType !== 'deepseek-v3' && modelType !== 'qwen3-72b' && modelType !== 'qwen-long' && modelType !== 'gemini-pro') {
|
||||
reply.code(400).send({
|
||||
success: false,
|
||||
message: `不支持的模型类型: ${modelType}`,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// 获取或创建对话记录
|
||||
let conversation;
|
||||
if (conversationId) {
|
||||
// 验证对话是否存在且属于当前用户
|
||||
conversation = await prisma.generalConversation.findFirst({
|
||||
where: {
|
||||
id: conversationId,
|
||||
userId,
|
||||
deletedAt: null,
|
||||
},
|
||||
});
|
||||
|
||||
if (!conversation) {
|
||||
reply.code(404).send({
|
||||
success: false,
|
||||
message: '对话不存在',
|
||||
});
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// 创建新对话
|
||||
conversation = await prisma.generalConversation.create({
|
||||
data: {
|
||||
userId,
|
||||
title: content.substring(0, 50), // 用第一条消息的前50字作为标题
|
||||
modelName: modelType,
|
||||
},
|
||||
});
|
||||
console.log('✅ [ChatController] 创建新对话', { conversationId: conversation.id });
|
||||
}
|
||||
|
||||
// 检索知识库上下文
|
||||
let knowledgeBaseContext = '';
|
||||
const allCitations: Citation[] = []; // 存储所有引用信息
|
||||
let citationCounter = 1; // 全局引用计数器
|
||||
|
||||
// Phase 2: 全文阅读模式 - 传递完整文献全文
|
||||
if (fullTextDocumentIds && fullTextDocumentIds.length > 0) {
|
||||
console.log('📚 [ChatController] 全文阅读模式 - 加载文献全文', {
|
||||
documentCount: fullTextDocumentIds.length,
|
||||
});
|
||||
|
||||
try {
|
||||
// 获取所有选中文档的全文
|
||||
const documents = await prisma.document.findMany({
|
||||
where: {
|
||||
id: { in: fullTextDocumentIds },
|
||||
},
|
||||
select: {
|
||||
id: true,
|
||||
filename: true,
|
||||
extractedText: true,
|
||||
tokensCount: true,
|
||||
},
|
||||
orderBy: {
|
||||
filename: 'asc', // 按文件名排序
|
||||
},
|
||||
});
|
||||
|
||||
console.log(`📄 [ChatController] 加载了 ${documents.length} 篇文献全文`);
|
||||
|
||||
// 过滤掉没有extractedText的文档
|
||||
const validDocuments = documents.filter(doc => doc.extractedText && doc.extractedText.trim().length > 0);
|
||||
|
||||
if (validDocuments.length === 0) {
|
||||
console.warn('⚠️ [ChatController] 所有文档都没有提取文本,无法使用全文模式');
|
||||
} else if (validDocuments.length < documents.length) {
|
||||
console.warn(`⚠️ [ChatController] ${documents.length - validDocuments.length} 篇文档没有提取文本,已跳过`);
|
||||
}
|
||||
|
||||
// 组装全文上下文,每篇文献用明确的标记分隔
|
||||
const fullTextParts: string[] = [];
|
||||
|
||||
for (let i = 0; i < validDocuments.length; i++) {
|
||||
const doc = validDocuments[i];
|
||||
const docNumber = i + 1;
|
||||
|
||||
// 为每篇文献添加引用信息
|
||||
allCitations.push({
|
||||
id: docNumber,
|
||||
fileName: doc.filename,
|
||||
position: 0, // 全文没有position概念
|
||||
score: 1.0, // 全文模式相关度100%
|
||||
content: doc.extractedText?.substring(0, 200) || '(无内容)',
|
||||
});
|
||||
|
||||
// 格式:【文献N:文件名】\n全文内容
|
||||
fullTextParts.push(
|
||||
`【文献${docNumber}:${doc.filename}】\n\n${doc.extractedText}`
|
||||
);
|
||||
}
|
||||
|
||||
knowledgeBaseContext = fullTextParts.join('\n\n---\n\n');
|
||||
|
||||
const totalTokens = validDocuments.reduce((sum, doc) => sum + (doc.tokensCount || 0), 0);
|
||||
|
||||
console.log(`📚 [ChatController] 全文上下文已组装`, {
|
||||
totalDocuments: validDocuments.length,
|
||||
totalCharacters: knowledgeBaseContext.length,
|
||||
totalTokens: totalTokens,
|
||||
estimatedTokens: Math.round(knowledgeBaseContext.length / 2.5), // 粗略估算
|
||||
});
|
||||
|
||||
// ⚠️ 检查Token限制(Qwen-Long输入限制:1M tokens)
|
||||
const QWEN_LONG_INPUT_LIMIT = 1000000;
|
||||
const SYSTEM_OVERHEAD = 10000; // 系统提示、格式等开销
|
||||
const SAFE_INPUT_LIMIT = QWEN_LONG_INPUT_LIMIT - SYSTEM_OVERHEAD;
|
||||
|
||||
if (totalTokens > SAFE_INPUT_LIMIT) {
|
||||
const errorMsg = `输入Token数量 (${totalTokens}) 超出Qwen-Long模型限制 (${SAFE_INPUT_LIMIT})。请减少文献数量后重试。`;
|
||||
console.error(`❌ [ChatController] ${errorMsg}`);
|
||||
|
||||
// 返回错误信息给前端
|
||||
reply.raw.write(`data: ${JSON.stringify({
|
||||
content: `\n\n⚠️ **Token数量超限**\n\n${errorMsg}\n\n**建议**:\n- 当前选中 ${validDocuments.length} 篇文献,共 ${totalTokens.toLocaleString()} tokens\n- 请减少到 ${Math.floor(validDocuments.length * SAFE_INPUT_LIMIT / totalTokens)} 篇以内\n- 或使用"逐篇精读"模式深入分析单篇文献`,
|
||||
role: 'assistant',
|
||||
error: true,
|
||||
})}\n\n`);
|
||||
reply.raw.write('data: [DONE]\n\n');
|
||||
return reply.raw.end();
|
||||
}
|
||||
|
||||
// 警告:如果接近限制
|
||||
if (totalTokens > SAFE_INPUT_LIMIT * 0.8) {
|
||||
console.warn(`⚠️ [ChatController] Token数量接近限制 (${totalTokens}/${SAFE_INPUT_LIMIT}), 建议减少文献数量`);
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error('❌ [ChatController] 加载文献全文失败:', error);
|
||||
// 不throw错误,继续执行(可能没有全文也能正常对话)
|
||||
}
|
||||
}
|
||||
// RAG检索模式(逐篇精读或通用对话)
|
||||
else if (knowledgeBaseIds && knowledgeBaseIds.length > 0) {
|
||||
console.log('📚 [ChatController] 开始检索知识库');
|
||||
const knowledgeResults: string[] = [];
|
||||
|
||||
// Phase 2: 如果指定了文档ID(逐篇精读模式),需要更多结果用于过滤
|
||||
const topK = documentIds && documentIds.length > 0 ? 50 : 15;
|
||||
|
||||
for (const kbId of knowledgeBaseIds) {
|
||||
try {
|
||||
const searchResult = await knowledgeBaseService.searchKnowledgeBase(
|
||||
userId,
|
||||
kbId,
|
||||
content,
|
||||
topK
|
||||
);
|
||||
|
||||
if (searchResult.records && searchResult.records.length > 0) {
|
||||
let records = searchResult.records;
|
||||
|
||||
// Phase 2: 逐篇精读模式 - 过滤出指定文档的结果
|
||||
if (documentIds && documentIds.length > 0) {
|
||||
console.log(`🔍 [ChatController] 逐篇精读模式 - 过滤文档`, { documentIds });
|
||||
|
||||
// 获取文档的Dify ID映射
|
||||
const documents = await prisma.document.findMany({
|
||||
where: {
|
||||
id: { in: documentIds },
|
||||
knowledgeBase: {
|
||||
id: kbId,
|
||||
},
|
||||
},
|
||||
select: {
|
||||
id: true,
|
||||
filename: true,
|
||||
difyDocumentId: true,
|
||||
},
|
||||
});
|
||||
|
||||
const difyDocIds = documents.map(d => d.difyDocumentId).filter(Boolean);
|
||||
console.log(`📄 [ChatController] 目标Dify文档ID:`, difyDocIds);
|
||||
|
||||
// 过滤结果
|
||||
const beforeCount = records.length;
|
||||
records = records.filter((record: any) => {
|
||||
const docId = record.segment?.document?.id || record.document_id;
|
||||
return docId && difyDocIds.includes(docId);
|
||||
});
|
||||
|
||||
console.log(`✂️ [ChatController] 过滤结果: ${beforeCount} → ${records.length}`);
|
||||
|
||||
// 如果过滤后结果太少,警告
|
||||
if (records.length === 0) {
|
||||
console.warn('⚠️ [ChatController] 过滤后没有结果,可能是文档ID不匹配');
|
||||
}
|
||||
|
||||
// 只取前15个
|
||||
records = records.slice(0, 15);
|
||||
}
|
||||
|
||||
if (records.length > 0) {
|
||||
const kbInfo = await prisma.knowledgeBase.findUnique({
|
||||
where: { id: kbId },
|
||||
select: { name: true },
|
||||
});
|
||||
|
||||
// 优化格式:使用[来源N]标记,便于AI引用
|
||||
const formattedResult = `【知识库:${kbInfo?.name || '未命名'}】\n` +
|
||||
records
|
||||
.map((record: any) => {
|
||||
const citationId = citationCounter++;
|
||||
const score = (record.score * 100).toFixed(1);
|
||||
|
||||
// 保存引用信息
|
||||
allCitations.push({
|
||||
id: citationId,
|
||||
fileName: record.segment?.document?.name || record.document_name || '未知文档',
|
||||
position: record.segment?.position || record.segment_position || 0,
|
||||
score: record.score,
|
||||
content: record.segment?.content || record.content || '',
|
||||
});
|
||||
|
||||
return `[来源${citationId}] [相关度${score}%]\n${record.segment?.content || record.content}`;
|
||||
})
|
||||
.join('\n\n');
|
||||
|
||||
knowledgeResults.push(formattedResult);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`❌ [ChatController] 检索知识库失败 ${kbId}:`, error);
|
||||
}
|
||||
}
|
||||
|
||||
if (knowledgeResults.length > 0) {
|
||||
knowledgeBaseContext = knowledgeResults.join('\n\n---\n\n');
|
||||
console.log(`💾 [ChatController] 知识库上下文: ${knowledgeBaseContext.length} 字符`);
|
||||
console.log(`📚 [ChatController] 收集到 ${allCitations.length} 个引用`);
|
||||
}
|
||||
}
|
||||
|
||||
// 获取历史消息(最近20条)
|
||||
const historyMessages = await prisma.generalMessage.findMany({
|
||||
where: {
|
||||
conversationId: conversation.id,
|
||||
},
|
||||
orderBy: {
|
||||
createdAt: 'desc',
|
||||
},
|
||||
take: 20,
|
||||
});
|
||||
historyMessages.reverse();
|
||||
console.log(`📜 [ChatController] 历史消息数: ${historyMessages.length}`);
|
||||
|
||||
// 组装消息上下文
|
||||
let systemPrompt = '你是一个专业、友好的AI助手。当用户提供参考资料时,请优先基于参考资料回答。';
|
||||
|
||||
// 全文阅读模式的系统提示
|
||||
if (fullTextDocumentIds && fullTextDocumentIds.length > 0) {
|
||||
systemPrompt = '你是一个专业的学术文献分析助手。用户会提供多篇文献的完整全文,每篇文献用【文献N:文件名】标记。请认真阅读所有文献,进行深入的综合分析。在回答时请引用具体文献,使用【文献N】格式。你的优势是能够看到所有文献的全貌,进行跨文献的比较、归纳和总结。';
|
||||
}
|
||||
|
||||
const messages: any[] = [
|
||||
{
|
||||
role: 'system',
|
||||
content: systemPrompt,
|
||||
},
|
||||
];
|
||||
|
||||
// 添加历史消息
|
||||
for (const msg of historyMessages) {
|
||||
messages.push({
|
||||
role: msg.role,
|
||||
content: msg.content,
|
||||
});
|
||||
}
|
||||
|
||||
// 添加当前用户消息
|
||||
let userContent = content;
|
||||
if (knowledgeBaseContext) {
|
||||
// 全文阅读模式的提示
|
||||
if (fullTextDocumentIds && fullTextDocumentIds.length > 0) {
|
||||
userContent = `${content}\n\n## 参考资料(文献全文)\n\n**重要提示**:下面提供的是完整的文献全文。每篇文献用【文献N:文件名】标记。请在回答时引用文献,格式如"根据【文献1】..."或"研究表明【文献2】【文献3】..."。你可以综合分析所有文献,进行跨文献的比较和总结。\n\n${knowledgeBaseContext}`;
|
||||
}
|
||||
// RAG检索模式的提示
|
||||
else {
|
||||
userContent = `${content}\n\n## 参考资料(来自知识库)\n\n**重要提示**:下面提供的文献片段已经用[来源N]进行了标记。请在回答中引用具体来源时使用对应的编号,如"根据[来源1]..."或"研究表明[来源3][来源5]..."。系统会在你回答结束后自动显示完整的引用清单。\n\n${knowledgeBaseContext}`;
|
||||
}
|
||||
}
|
||||
messages.push({
|
||||
role: 'user',
|
||||
content: userContent,
|
||||
});
|
||||
|
||||
// 设置SSE响应头
|
||||
reply.raw.writeHead(200, {
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache',
|
||||
Connection: 'keep-alive',
|
||||
'Access-Control-Allow-Origin': '*',
|
||||
});
|
||||
|
||||
// 保存用户消息
|
||||
await prisma.generalMessage.create({
|
||||
data: {
|
||||
conversationId: conversation.id,
|
||||
role: 'user',
|
||||
content,
|
||||
metadata: {
|
||||
knowledgeBaseIds,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
// 流式输出
|
||||
const adapter = LLMFactory.getAdapter(modelType);
|
||||
let fullContent = '';
|
||||
let usage: any = null;
|
||||
|
||||
// Phase 2: 全文阅读模式需要更大的输出空间(用于综合分析、引用等)
|
||||
const maxOutputTokens = fullTextDocumentIds && fullTextDocumentIds.length > 0
|
||||
? 6000 // 全文模式:需要更长的回答空间
|
||||
: 2000; // 其他模式:常规长度
|
||||
|
||||
console.log(`🤖 [ChatController] 开始调用LLM`, {
|
||||
model: modelType,
|
||||
maxOutputTokens,
|
||||
mode: fullTextDocumentIds && fullTextDocumentIds.length > 0 ? '全文阅读' : '其他',
|
||||
});
|
||||
|
||||
for await (const chunk of adapter.chatStream(messages, {
|
||||
temperature: 0.7,
|
||||
maxTokens: maxOutputTokens,
|
||||
})) {
|
||||
fullContent += chunk.content;
|
||||
|
||||
if (chunk.usage) {
|
||||
usage = chunk.usage;
|
||||
}
|
||||
|
||||
// 发送SSE数据
|
||||
reply.raw.write(`data: ${JSON.stringify(chunk)}\n\n`);
|
||||
}
|
||||
|
||||
// AI回答完毕后,追加引用清单
|
||||
if (allCitations.length > 0) {
|
||||
console.log(`📚 [ChatController] 追加 ${allCitations.length} 个引用清单`);
|
||||
const citationsText = formatCitations(allCitations);
|
||||
fullContent += citationsText;
|
||||
|
||||
// 将引用清单也流式输出
|
||||
const citationChunk = {
|
||||
content: citationsText,
|
||||
role: 'assistant' as const,
|
||||
};
|
||||
reply.raw.write(`data: ${JSON.stringify(citationChunk)}\n\n`);
|
||||
}
|
||||
|
||||
// 保存助手消息
|
||||
await prisma.generalMessage.create({
|
||||
data: {
|
||||
conversationId: conversation.id,
|
||||
role: 'assistant',
|
||||
content: fullContent,
|
||||
model: modelType,
|
||||
tokens: usage?.totalTokens,
|
||||
metadata: {
|
||||
usage,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
// 更新对话
|
||||
await prisma.generalConversation.update({
|
||||
where: { id: conversation.id },
|
||||
data: {
|
||||
updatedAt: new Date(),
|
||||
},
|
||||
});
|
||||
|
||||
// 发送完成信号
|
||||
reply.raw.write(`data: [DONE]\n\n`);
|
||||
reply.raw.end();
|
||||
|
||||
console.log('✅ [ChatController] 对话完成');
|
||||
} catch (error: any) {
|
||||
console.error('❌ [ChatController] 错误:', error);
|
||||
reply.code(500).send({
|
||||
success: false,
|
||||
message: error.message || '服务器错误',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取对话列表
|
||||
*/
|
||||
async getConversations(
|
||||
_request: FastifyRequest,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
try {
|
||||
const userId = 'user-mock-001';
|
||||
|
||||
const conversations = await prisma.generalConversation.findMany({
|
||||
where: {
|
||||
userId,
|
||||
deletedAt: null,
|
||||
},
|
||||
orderBy: {
|
||||
updatedAt: 'desc',
|
||||
},
|
||||
take: 50,
|
||||
});
|
||||
|
||||
reply.send({
|
||||
success: true,
|
||||
data: conversations,
|
||||
});
|
||||
} catch (error: any) {
|
||||
reply.code(500).send({
|
||||
success: false,
|
||||
message: error.message || '获取对话列表失败',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除对话
|
||||
*/
|
||||
async deleteConversation(
|
||||
request: FastifyRequest<{ Params: { id: string } }>,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
try {
|
||||
const userId = 'user-mock-001';
|
||||
const { id } = request.params;
|
||||
|
||||
await prisma.generalConversation.update({
|
||||
where: {
|
||||
id,
|
||||
userId,
|
||||
},
|
||||
data: {
|
||||
deletedAt: new Date(),
|
||||
},
|
||||
});
|
||||
|
||||
reply.send({
|
||||
success: true,
|
||||
message: '删除成功',
|
||||
});
|
||||
} catch (error: any) {
|
||||
reply.code(500).send({
|
||||
success: false,
|
||||
message: error.message || '删除失败',
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export const chatController = new ChatController();
|
||||
|
||||
|
||||
@@ -131,7 +131,7 @@ export class ConversationController {
|
||||
request.body;
|
||||
|
||||
// 验证modelType
|
||||
if (modelType !== 'deepseek-v3' && modelType !== 'qwen3-72b' && modelType !== 'gemini-pro') {
|
||||
if (modelType !== 'deepseek-v3' && modelType !== 'qwen3-72b' && modelType !== 'qwen-long' && modelType !== 'gemini-pro') {
|
||||
reply.code(400).send({
|
||||
success: false,
|
||||
message: `不支持的模型类型: ${modelType}`,
|
||||
@@ -183,7 +183,7 @@ export class ConversationController {
|
||||
request.body;
|
||||
|
||||
// 验证modelType
|
||||
if (modelType !== 'deepseek-v3' && modelType !== 'qwen3-72b' && modelType !== 'gemini-pro') {
|
||||
if (modelType !== 'deepseek-v3' && modelType !== 'qwen3-72b' && modelType !== 'qwen-long' && modelType !== 'gemini-pro') {
|
||||
reply.code(400).send({
|
||||
success: false,
|
||||
message: `不支持的模型类型: ${modelType}`,
|
||||
@@ -257,4 +257,58 @@ export async function reprocessDocument(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Phase 2: 获取文档全文(用于逐篇精读模式)
|
||||
*/
|
||||
export async function getDocumentFullText(
|
||||
request: FastifyRequest<{
|
||||
Params: {
|
||||
id: string;
|
||||
};
|
||||
}>,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
try {
|
||||
const { id } = request.params;
|
||||
|
||||
const document = await documentService.getDocumentById(MOCK_USER_ID, id);
|
||||
|
||||
// 返回完整的文档信息
|
||||
return reply.send({
|
||||
success: true,
|
||||
data: {
|
||||
documentId: document.id,
|
||||
filename: document.filename,
|
||||
fileType: document.fileType,
|
||||
fileSizeBytes: document.fileSizeBytes,
|
||||
extractedText: (document as any).extractedText || null,
|
||||
charCount: (document as any).charCount || null,
|
||||
tokensCount: document.tokensCount || null,
|
||||
extractionMethod: (document as any).extractionMethod || null,
|
||||
extractionQuality: (document as any).extractionQuality || null,
|
||||
language: (document as any).language || null,
|
||||
metadata: {
|
||||
uploadedAt: document.uploadedAt,
|
||||
processedAt: document.processedAt,
|
||||
status: document.status,
|
||||
},
|
||||
},
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('Failed to get document full text:', error);
|
||||
|
||||
if (error.message.includes('not found')) {
|
||||
return reply.status(404).send({
|
||||
success: false,
|
||||
message: error.message,
|
||||
});
|
||||
}
|
||||
|
||||
return reply.status(500).send({
|
||||
success: false,
|
||||
message: error.message || 'Failed to get document full text',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -219,7 +219,7 @@ export async function searchKnowledgeBase(
|
||||
});
|
||||
}
|
||||
|
||||
const topK = top_k ? parseInt(top_k, 10) : 3;
|
||||
const topK = top_k ? parseInt(top_k, 10) : 15; // Phase 1优化:默认从3增加到15
|
||||
|
||||
const results = await knowledgeBaseService.searchKnowledgeBase(
|
||||
MOCK_USER_ID,
|
||||
@@ -289,3 +289,53 @@ export async function getKnowledgeBaseStats(
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取知识库文档选择(Phase 2: 全文阅读模式)
|
||||
*/
|
||||
export async function getDocumentSelection(
|
||||
request: FastifyRequest<{
|
||||
Params: {
|
||||
id: string;
|
||||
};
|
||||
Querystring: {
|
||||
max_files?: string;
|
||||
max_tokens?: string;
|
||||
};
|
||||
}>,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
try {
|
||||
const { id } = request.params;
|
||||
const { max_files, max_tokens } = request.query;
|
||||
|
||||
const maxFiles = max_files ? parseInt(max_files, 10) : undefined;
|
||||
const maxTokens = max_tokens ? parseInt(max_tokens, 10) : undefined;
|
||||
|
||||
const selection = await knowledgeBaseService.getDocumentSelection(
|
||||
MOCK_USER_ID,
|
||||
id,
|
||||
maxFiles,
|
||||
maxTokens
|
||||
);
|
||||
|
||||
return reply.send({
|
||||
success: true,
|
||||
data: selection,
|
||||
});
|
||||
} catch (error: any) {
|
||||
console.error('Failed to get document selection:', error);
|
||||
|
||||
if (error.message.includes('not found')) {
|
||||
return reply.status(404).send({
|
||||
success: false,
|
||||
message: error.message,
|
||||
});
|
||||
}
|
||||
|
||||
return reply.status(500).send({
|
||||
success: false,
|
||||
message: error.message || 'Failed to get document selection',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
292
backend/src/legacy/controllers/reviewController.ts
Normal file
292
backend/src/legacy/controllers/reviewController.ts
Normal file
@@ -0,0 +1,292 @@
|
||||
import type { FastifyRequest, FastifyReply } from 'fastify';
|
||||
import * as reviewService from '../services/reviewService.js';
|
||||
import { ModelType } from '../../common/llm/adapters/types.js';
|
||||
|
||||
// Mock用户ID(实际应从JWT token中获取)
|
||||
const MOCK_USER_ID = 'user-mock-001';
|
||||
|
||||
/**
|
||||
* 上传稿件并开始审查
|
||||
* POST /api/review/upload
|
||||
*/
|
||||
export async function uploadManuscript(
|
||||
request: FastifyRequest<{
|
||||
Body: {
|
||||
modelType?: string;
|
||||
};
|
||||
}>,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
try {
|
||||
console.log('📤 开始上传稿件进行审查...');
|
||||
|
||||
// 获取上传的文件
|
||||
const data = await request.file();
|
||||
|
||||
if (!data) {
|
||||
console.error('❌ 没有接收到文件');
|
||||
return reply.status(400).send({
|
||||
success: false,
|
||||
message: 'No file uploaded',
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`📄 接收到文件: ${data.filename}, 类型: ${data.mimetype}`);
|
||||
|
||||
const file = await data.toBuffer();
|
||||
const filename = data.filename;
|
||||
const fileType = data.mimetype;
|
||||
const fileSizeBytes = file.length;
|
||||
|
||||
// 文件大小限制(5MB,稿件通常不会太大)
|
||||
const maxSize = 5 * 1024 * 1024;
|
||||
console.log(`📊 文件大小: ${(fileSizeBytes / 1024 / 1024).toFixed(2)}MB (限制: 5MB)`);
|
||||
|
||||
if (fileSizeBytes > maxSize) {
|
||||
console.error(`❌ 文件太大: ${(fileSizeBytes / 1024 / 1024).toFixed(2)}MB`);
|
||||
return reply.status(400).send({
|
||||
success: false,
|
||||
message: 'File size exceeds 5MB limit',
|
||||
});
|
||||
}
|
||||
|
||||
// 文件类型限制(仅支持Word文档)
|
||||
const allowedTypes = [
|
||||
'application/msword', // .doc
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document', // .docx
|
||||
];
|
||||
|
||||
console.log(`🔍 检查文件类型: ${fileType}`);
|
||||
if (!allowedTypes.includes(fileType)) {
|
||||
console.error(`❌ 不支持的文件类型: ${fileType}`);
|
||||
return reply.status(400).send({
|
||||
success: false,
|
||||
message: 'File type not supported. Only Word documents (.doc, .docx) are allowed',
|
||||
});
|
||||
}
|
||||
|
||||
// 获取模型类型(默认deepseek-v3)
|
||||
const modelType = (data.fields.modelType?.value || 'deepseek-v3') as ModelType;
|
||||
|
||||
// 验证模型类型
|
||||
const validModels: ModelType[] = ['deepseek-v3', 'qwen3-72b', 'qwen-long'];
|
||||
if (!validModels.includes(modelType)) {
|
||||
return reply.status(400).send({
|
||||
success: false,
|
||||
message: `Invalid model type. Allowed: ${validModels.join(', ')}`,
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`🤖 使用模型: ${modelType}`);
|
||||
|
||||
// 调用服务层进行审查
|
||||
console.log('⚙️ 调用审查服务...');
|
||||
const task = await reviewService.reviewManuscript(file, filename, MOCK_USER_ID, modelType);
|
||||
|
||||
console.log(`✅ 审查任务已创建: ${task.id}`);
|
||||
|
||||
return reply.send({
|
||||
success: true,
|
||||
message: 'Manuscript uploaded successfully. Review task created.',
|
||||
data: {
|
||||
taskId: task.id,
|
||||
fileName: task.fileName,
|
||||
status: task.status,
|
||||
createdAt: task.createdAt,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('❌ 上传稿件失败:', error);
|
||||
return reply.status(500).send({
|
||||
success: false,
|
||||
message: error instanceof Error ? error.message : 'Upload failed',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取任务状态
|
||||
* GET /api/review/tasks/:taskId
|
||||
*/
|
||||
export async function getTaskStatus(
|
||||
request: FastifyRequest<{
|
||||
Params: {
|
||||
taskId: string;
|
||||
};
|
||||
}>,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
try {
|
||||
const { taskId } = request.params;
|
||||
console.log(`🔍 查询任务状态: ${taskId}`);
|
||||
|
||||
const task = await reviewService.getReviewTask(MOCK_USER_ID, taskId);
|
||||
|
||||
console.log(`✅ 任务状态: ${task.status}`);
|
||||
|
||||
return reply.send({
|
||||
success: true,
|
||||
data: {
|
||||
id: task.id,
|
||||
fileName: task.fileName,
|
||||
fileSize: task.fileSize,
|
||||
status: task.status,
|
||||
wordCount: task.wordCount,
|
||||
overallScore: task.overallScore,
|
||||
modelUsed: task.modelUsed,
|
||||
createdAt: task.createdAt,
|
||||
startedAt: task.startedAt,
|
||||
completedAt: task.completedAt,
|
||||
durationSeconds: task.durationSeconds,
|
||||
errorMessage: task.errorMessage,
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('❌ 查询任务状态失败:', error);
|
||||
return reply.status(404).send({
|
||||
success: false,
|
||||
message: error instanceof Error ? error.message : 'Task not found',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取审查报告
|
||||
* GET /api/review/tasks/:taskId/report
|
||||
*/
|
||||
export async function getTaskReport(
|
||||
request: FastifyRequest<{
|
||||
Params: {
|
||||
taskId: string;
|
||||
};
|
||||
}>,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
try {
|
||||
const { taskId } = request.params;
|
||||
console.log(`📊 获取审查报告: ${taskId}`);
|
||||
|
||||
const report = await reviewService.getReviewReport(MOCK_USER_ID, taskId);
|
||||
|
||||
console.log(`✅ 报告已生成`);
|
||||
|
||||
return reply.send({
|
||||
success: true,
|
||||
data: report,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('❌ 获取报告失败:', error);
|
||||
|
||||
// 如果报告尚未完成,返回202状态
|
||||
if (error instanceof Error && error.message.includes('not ready yet')) {
|
||||
return reply.status(202).send({
|
||||
success: false,
|
||||
message: error.message,
|
||||
});
|
||||
}
|
||||
|
||||
return reply.status(404).send({
|
||||
success: false,
|
||||
message: error instanceof Error ? error.message : 'Report not found',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取任务列表
|
||||
* GET /api/review/tasks
|
||||
*/
|
||||
export async function getTaskList(
|
||||
request: FastifyRequest<{
|
||||
Querystring: {
|
||||
page?: string;
|
||||
limit?: string;
|
||||
};
|
||||
}>,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
try {
|
||||
const page = parseInt(request.query.page || '1', 10);
|
||||
const limit = parseInt(request.query.limit || '20', 10);
|
||||
|
||||
console.log(`📋 获取任务列表: page=${page}, limit=${limit}`);
|
||||
|
||||
const result = await reviewService.getReviewTasks(MOCK_USER_ID, page, limit);
|
||||
|
||||
console.log(`✅ 找到 ${result.tasks.length} 个任务`);
|
||||
|
||||
return reply.send({
|
||||
success: true,
|
||||
data: result.tasks,
|
||||
pagination: result.pagination,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('❌ 获取任务列表失败:', error);
|
||||
return reply.status(500).send({
|
||||
success: false,
|
||||
message: error instanceof Error ? error.message : 'Failed to get task list',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除任务
|
||||
* DELETE /api/review/tasks/:taskId
|
||||
*/
|
||||
export async function deleteTask(
|
||||
request: FastifyRequest<{
|
||||
Params: {
|
||||
taskId: string;
|
||||
};
|
||||
}>,
|
||||
reply: FastifyReply
|
||||
) {
|
||||
try {
|
||||
const { taskId } = request.params;
|
||||
console.log(`🗑️ 删除任务: ${taskId}`);
|
||||
|
||||
await reviewService.deleteReviewTask(MOCK_USER_ID, taskId);
|
||||
|
||||
console.log(`✅ 任务已删除`);
|
||||
|
||||
return reply.send({
|
||||
success: true,
|
||||
message: 'Task deleted successfully',
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('❌ 删除任务失败:', error);
|
||||
return reply.status(404).send({
|
||||
success: false,
|
||||
message: error instanceof Error ? error.message : 'Failed to delete task',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
38
backend/src/legacy/routes/batchRoutes.ts
Normal file
38
backend/src/legacy/routes/batchRoutes.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
/**
|
||||
* Phase 3: 批处理模式 - 路由配置
|
||||
*/
|
||||
|
||||
import { FastifyInstance } from 'fastify';
|
||||
import {
|
||||
executeBatch,
|
||||
getTask,
|
||||
getTaskResults,
|
||||
retryFailed,
|
||||
getTemplates,
|
||||
} from '../controllers/batchController.js';
|
||||
|
||||
export async function batchRoutes(fastify: FastifyInstance) {
|
||||
// 执行批处理任务
|
||||
fastify.post('/batch/execute', executeBatch);
|
||||
|
||||
// 获取任务状态
|
||||
fastify.get('/batch/tasks/:taskId', getTask);
|
||||
|
||||
// 获取任务结果
|
||||
fastify.get('/batch/tasks/:taskId/results', getTaskResults);
|
||||
|
||||
// 重试失败的文档
|
||||
fastify.post('/batch/tasks/:taskId/retry-failed', retryFailed);
|
||||
|
||||
// 获取所有预设模板
|
||||
fastify.get('/batch/templates', getTemplates);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -12,3 +12,4 @@ export async function chatRoutes(fastify: FastifyInstance) {
|
||||
fastify.delete('/chat/conversations/:id', chatController.deleteConversation.bind(chatController));
|
||||
}
|
||||
|
||||
|
||||
@@ -26,6 +26,9 @@ export default async function knowledgeBaseRoutes(fastify: FastifyInstance) {
|
||||
// 获取知识库统计信息
|
||||
fastify.get('/knowledge-bases/:id/stats', knowledgeBaseController.getKnowledgeBaseStats);
|
||||
|
||||
// Phase 2: 获取文档选择(全文阅读模式)
|
||||
fastify.get('/knowledge-bases/:id/document-selection', knowledgeBaseController.getDocumentSelection);
|
||||
|
||||
// ==================== 文档管理 API ====================
|
||||
|
||||
// 上传文档
|
||||
@@ -37,6 +40,9 @@ export default async function knowledgeBaseRoutes(fastify: FastifyInstance) {
|
||||
// 获取文档详情
|
||||
fastify.get('/documents/:id', documentController.getDocumentById);
|
||||
|
||||
// Phase 2: 获取文档全文
|
||||
fastify.get('/documents/:id/full-text', documentController.getDocumentFullText);
|
||||
|
||||
// 删除文档
|
||||
fastify.delete('/documents/:id', documentController.deleteDocument);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
|
||||
import { projectController } from '../controllers/projectController.js';
|
||||
import { validateProjectCreate, validateProjectUpdate } from '../middleware/validateProject.js';
|
||||
import { validateProjectCreate, validateProjectUpdate } from '../../common/middleware/validateProject.js';
|
||||
|
||||
interface ProjectParams {
|
||||
id: string;
|
||||
50
backend/src/legacy/routes/reviewRoutes.ts
Normal file
50
backend/src/legacy/routes/reviewRoutes.ts
Normal file
@@ -0,0 +1,50 @@
|
||||
import type { FastifyInstance } from 'fastify';
|
||||
import * as reviewController from '../controllers/reviewController.js';
|
||||
|
||||
export default async function reviewRoutes(fastify: FastifyInstance) {
|
||||
// ==================== 稿件审查 API ====================
|
||||
|
||||
// 上传稿件并开始审查
|
||||
fastify.post('/review/upload', reviewController.uploadManuscript);
|
||||
|
||||
// 获取任务状态
|
||||
fastify.get('/review/tasks/:taskId', reviewController.getTaskStatus);
|
||||
|
||||
// 获取审查报告
|
||||
fastify.get('/review/tasks/:taskId/report', reviewController.getTaskReport);
|
||||
|
||||
// 获取任务列表
|
||||
fastify.get('/review/tasks', reviewController.getTaskList);
|
||||
|
||||
// 删除任务
|
||||
fastify.delete('/review/tasks/:taskId', reviewController.deleteTask);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -43,8 +43,8 @@ class AgentService {
|
||||
|
||||
constructor() {
|
||||
// 配置文件路径
|
||||
this.configPath = path.resolve(__dirname, '../../config/agents.yaml');
|
||||
this.promptsPath = path.resolve(__dirname, '../../prompts');
|
||||
this.configPath = path.resolve(__dirname, '../../../config/agents.yaml');
|
||||
this.promptsPath = path.resolve(__dirname, '../../../prompts');
|
||||
|
||||
// 初始化加载配置
|
||||
this.loadAgents();
|
||||
420
backend/src/legacy/services/batchService.ts
Normal file
420
backend/src/legacy/services/batchService.ts
Normal file
@@ -0,0 +1,420 @@
|
||||
/**
|
||||
* Phase 3: 批处理模式 - 批处理服务
|
||||
*
|
||||
* 核心功能:
|
||||
* 1. 执行批处理任务(3并发)
|
||||
* 2. 处理单个文档
|
||||
* 3. 进度推送(WebSocket)
|
||||
* 4. 错误处理和重试
|
||||
*/
|
||||
|
||||
import PQueue from 'p-queue';
|
||||
import { prisma } from '../../config/database.js';
|
||||
import { LLMFactory } from '../../common/llm/adapters/LLMFactory.js';
|
||||
import { ModelType } from '../../common/llm/adapters/types.js';
|
||||
import { getTemplate } from '../templates/clinicalResearch.js';
|
||||
import { parseJSON } from '../../common/utils/jsonParser.js';
|
||||
|
||||
export interface ExecuteBatchTaskParams {
|
||||
userId: string;
|
||||
kbId: string;
|
||||
documentIds: string[];
|
||||
templateType: 'preset' | 'custom';
|
||||
templateId?: string; // 预设模板ID
|
||||
customPrompt?: string; // 自定义提示词
|
||||
modelType: ModelType;
|
||||
taskName?: string;
|
||||
existingTaskId?: string; // 已存在的任务ID(可选)
|
||||
onProgress?: (progress: BatchProgress) => void;
|
||||
}
|
||||
|
||||
export interface BatchProgress {
|
||||
taskId: string;
|
||||
completed: number;
|
||||
total: number;
|
||||
failed: number;
|
||||
currentDocument?: string;
|
||||
estimatedSeconds?: number;
|
||||
}
|
||||
|
||||
export interface BatchTaskResult {
|
||||
taskId: string;
|
||||
status: 'processing' | 'completed' | 'failed';
|
||||
totalDocuments: number;
|
||||
completedCount: number;
|
||||
failedCount: number;
|
||||
durationSeconds?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行批处理任务
|
||||
*/
|
||||
export async function executeBatchTask(
|
||||
params: ExecuteBatchTaskParams
|
||||
): Promise<BatchTaskResult> {
|
||||
const {
|
||||
userId,
|
||||
kbId,
|
||||
documentIds,
|
||||
templateType,
|
||||
templateId,
|
||||
customPrompt,
|
||||
modelType,
|
||||
taskName,
|
||||
existingTaskId,
|
||||
onProgress,
|
||||
} = params;
|
||||
|
||||
console.log('📦 [BatchService] 开始执行批处理任务', {
|
||||
userId,
|
||||
kbId,
|
||||
documentCount: documentIds.length,
|
||||
templateType,
|
||||
modelType,
|
||||
existingTaskId: existingTaskId || '新建',
|
||||
});
|
||||
|
||||
// 验证文献数量 (3-50篇)
|
||||
if (documentIds.length < 3 || documentIds.length > 50) {
|
||||
throw new Error(`文献数量必须在3-50篇之间,当前:${documentIds.length}篇`);
|
||||
}
|
||||
|
||||
// 获取模板或使用自定义提示词
|
||||
let systemPrompt: string;
|
||||
let userPromptTemplate: string;
|
||||
let expectedFields: string[] = [];
|
||||
|
||||
if (templateType === 'preset') {
|
||||
if (!templateId) {
|
||||
throw new Error('预设模板类型需要提供templateId');
|
||||
}
|
||||
|
||||
const template = getTemplate(templateId);
|
||||
if (!template) {
|
||||
throw new Error(`未找到模板: ${templateId}`);
|
||||
}
|
||||
|
||||
systemPrompt = template.systemPrompt;
|
||||
userPromptTemplate = template.userPrompt;
|
||||
expectedFields = template.outputFields.map(f => f.key);
|
||||
} else {
|
||||
// 自定义模板
|
||||
if (!customPrompt) {
|
||||
throw new Error('自定义模板需要提供customPrompt');
|
||||
}
|
||||
|
||||
systemPrompt = '你是一个专业的文献分析助手。请根据用户的要求分析文献内容。';
|
||||
userPromptTemplate = customPrompt;
|
||||
}
|
||||
|
||||
// 使用已存在的任务或创建新任务
|
||||
let task;
|
||||
if (existingTaskId) {
|
||||
task = await prisma.batchTask.findUnique({
|
||||
where: { id: existingTaskId },
|
||||
});
|
||||
if (!task) {
|
||||
throw new Error(`任务不存在: ${existingTaskId}`);
|
||||
}
|
||||
console.log(`✅ [BatchService] 使用已存在的任务: ${task.id}`);
|
||||
} else {
|
||||
task = await prisma.batchTask.create({
|
||||
data: {
|
||||
userId,
|
||||
kbId,
|
||||
name: taskName || `批处理任务_${new Date().toLocaleString('zh-CN')}`,
|
||||
templateType,
|
||||
templateId: templateId || null,
|
||||
prompt: userPromptTemplate,
|
||||
status: 'processing',
|
||||
totalDocuments: documentIds.length,
|
||||
completedCount: 0,
|
||||
failedCount: 0,
|
||||
modelType,
|
||||
concurrency: 3, // 固定3并发
|
||||
startedAt: new Date(),
|
||||
},
|
||||
});
|
||||
console.log(`✅ [BatchService] 创建任务记录: ${task.id}`);
|
||||
}
|
||||
|
||||
const startTime = Date.now();
|
||||
let completedCount = 0;
|
||||
let failedCount = 0;
|
||||
|
||||
// 创建并发队列(固定3并发)
|
||||
const queue = new PQueue({ concurrency: 3 });
|
||||
|
||||
// 处理所有文档
|
||||
const promises = documentIds.map((docId, index) =>
|
||||
queue.add(async () => {
|
||||
try {
|
||||
console.log(`🔄 [BatchService] 处理文档 ${index + 1}/${documentIds.length}: ${docId}`);
|
||||
|
||||
// 获取文档
|
||||
const document = await prisma.document.findUnique({
|
||||
where: { id: docId },
|
||||
select: {
|
||||
id: true,
|
||||
filename: true,
|
||||
extractedText: true,
|
||||
tokensCount: true,
|
||||
},
|
||||
});
|
||||
|
||||
if (!document) {
|
||||
throw new Error(`文档不存在: ${docId}`);
|
||||
}
|
||||
|
||||
if (!document.extractedText) {
|
||||
throw new Error(`文档未提取文本: ${document.filename}`);
|
||||
}
|
||||
|
||||
// 调用LLM处理
|
||||
const result = await processDocument({
|
||||
document,
|
||||
systemPrompt,
|
||||
userPromptTemplate,
|
||||
modelType,
|
||||
templateType,
|
||||
expectedFields,
|
||||
});
|
||||
|
||||
// 保存结果
|
||||
await prisma.batchResult.create({
|
||||
data: {
|
||||
taskId: task.id,
|
||||
documentId: docId,
|
||||
status: 'success',
|
||||
data: result.data,
|
||||
rawOutput: result.rawOutput,
|
||||
processingTimeMs: result.processingTimeMs,
|
||||
tokensUsed: result.tokensUsed,
|
||||
},
|
||||
});
|
||||
|
||||
completedCount++;
|
||||
console.log(`✅ [BatchService] 文档处理成功: ${document.filename} (${result.processingTimeMs}ms)`);
|
||||
|
||||
} catch (error: any) {
|
||||
// 处理失败
|
||||
console.error(`❌ [BatchService] 文档处理失败: ${docId}`, error);
|
||||
|
||||
await prisma.batchResult.create({
|
||||
data: {
|
||||
taskId: task.id,
|
||||
documentId: docId,
|
||||
status: 'failed',
|
||||
errorMessage: error.message,
|
||||
},
|
||||
});
|
||||
|
||||
failedCount++;
|
||||
}
|
||||
|
||||
// 推送进度
|
||||
if (onProgress) {
|
||||
const progress: BatchProgress = {
|
||||
taskId: task.id,
|
||||
completed: completedCount + failedCount,
|
||||
total: documentIds.length,
|
||||
failed: failedCount,
|
||||
estimatedSeconds: calculateEstimatedTime(
|
||||
completedCount + failedCount,
|
||||
documentIds.length,
|
||||
Date.now() - startTime
|
||||
),
|
||||
};
|
||||
onProgress(progress);
|
||||
}
|
||||
|
||||
// 更新任务进度
|
||||
await prisma.batchTask.update({
|
||||
where: { id: task.id },
|
||||
data: {
|
||||
completedCount,
|
||||
failedCount,
|
||||
},
|
||||
});
|
||||
})
|
||||
);
|
||||
|
||||
// 等待所有任务完成
|
||||
await Promise.allSettled(promises);
|
||||
|
||||
// 计算总时长
|
||||
const durationSeconds = Math.round((Date.now() - startTime) / 1000);
|
||||
|
||||
// 更新任务状态
|
||||
await prisma.batchTask.update({
|
||||
where: { id: task.id },
|
||||
data: {
|
||||
status: 'completed',
|
||||
completedAt: new Date(),
|
||||
durationSeconds,
|
||||
},
|
||||
});
|
||||
|
||||
console.log(`🎉 [BatchService] 批处理任务完成: ${task.id}`, {
|
||||
total: documentIds.length,
|
||||
success: completedCount,
|
||||
failed: failedCount,
|
||||
durationSeconds,
|
||||
});
|
||||
|
||||
return {
|
||||
taskId: task.id,
|
||||
status: 'completed',
|
||||
totalDocuments: documentIds.length,
|
||||
completedCount,
|
||||
failedCount,
|
||||
durationSeconds,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理单个文档
|
||||
*/
|
||||
async function processDocument(params: {
|
||||
document: {
|
||||
id: string;
|
||||
filename: string;
|
||||
extractedText: string;
|
||||
tokensCount: number | null;
|
||||
};
|
||||
systemPrompt: string;
|
||||
userPromptTemplate: string;
|
||||
modelType: ModelType;
|
||||
templateType: 'preset' | 'custom';
|
||||
expectedFields: string[];
|
||||
}): Promise<{
|
||||
data: any;
|
||||
rawOutput: string;
|
||||
processingTimeMs: number;
|
||||
tokensUsed?: number;
|
||||
}> {
|
||||
const {
|
||||
document,
|
||||
systemPrompt,
|
||||
userPromptTemplate,
|
||||
modelType,
|
||||
templateType,
|
||||
expectedFields,
|
||||
} = params;
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
// 构造完整的用户消息
|
||||
const userMessage = `${userPromptTemplate}\n\n【文献:${document.filename}】\n\n${document.extractedText}`;
|
||||
|
||||
// 调用LLM
|
||||
const adapter = LLMFactory.getAdapter(modelType);
|
||||
const response = await adapter.chat(
|
||||
[
|
||||
{ role: 'system', content: systemPrompt },
|
||||
{ role: 'user', content: userMessage },
|
||||
],
|
||||
{
|
||||
temperature: 0.3, // 降低温度提高稳定性
|
||||
maxTokens: 2000,
|
||||
}
|
||||
);
|
||||
|
||||
const processingTimeMs = Date.now() - startTime;
|
||||
const rawOutput = response.content;
|
||||
|
||||
// 解析结果
|
||||
let data: any;
|
||||
|
||||
if (templateType === 'preset') {
|
||||
// 预设模板:解析JSON
|
||||
const parseResult = parseJSON(rawOutput, expectedFields);
|
||||
|
||||
if (!parseResult.success) {
|
||||
throw new Error(`JSON解析失败: ${parseResult.error}`);
|
||||
}
|
||||
|
||||
data = parseResult.data;
|
||||
} else {
|
||||
// 自定义模板:直接使用文本
|
||||
data = {
|
||||
extracted_text: rawOutput,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
data,
|
||||
rawOutput,
|
||||
processingTimeMs,
|
||||
tokensUsed: response.usage?.totalTokens,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算预估剩余时间
|
||||
*/
|
||||
function calculateEstimatedTime(
|
||||
completed: number,
|
||||
total: number,
|
||||
elapsedMs: number
|
||||
): number {
|
||||
if (completed === 0) return 0;
|
||||
|
||||
const avgTimePerDoc = elapsedMs / completed;
|
||||
const remaining = total - completed;
|
||||
return Math.round((avgTimePerDoc * remaining) / 1000);
|
||||
}
|
||||
|
||||
/**
|
||||
* 重试失败的文档
|
||||
*/
|
||||
export async function retryFailedDocuments(
|
||||
taskId: string,
|
||||
onProgress?: (progress: BatchProgress) => void
|
||||
): Promise<{ retriedCount: number }> {
|
||||
console.log(`🔄 [BatchService] 重试失败文档: ${taskId}`);
|
||||
|
||||
// 获取任务信息
|
||||
const task = await prisma.batchTask.findUnique({
|
||||
where: { id: taskId },
|
||||
include: {
|
||||
results: {
|
||||
where: { status: 'failed' },
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
if (!task) {
|
||||
throw new Error(`任务不存在: ${taskId}`);
|
||||
}
|
||||
|
||||
const failedDocIds = task.results.map(r => r.documentId);
|
||||
|
||||
if (failedDocIds.length === 0) {
|
||||
return { retriedCount: 0 };
|
||||
}
|
||||
|
||||
// 删除旧的失败记录
|
||||
await prisma.batchResult.deleteMany({
|
||||
where: {
|
||||
taskId,
|
||||
status: 'failed',
|
||||
},
|
||||
});
|
||||
|
||||
// 重新执行
|
||||
await executeBatchTask({
|
||||
userId: task.userId,
|
||||
kbId: task.kbId,
|
||||
documentIds: failedDocIds,
|
||||
templateType: task.templateType as 'preset' | 'custom',
|
||||
templateId: task.templateId || undefined,
|
||||
customPrompt: task.templateType === 'custom' ? task.prompt : undefined,
|
||||
modelType: task.modelType as ModelType,
|
||||
taskName: `${task.name} (重试)`,
|
||||
onProgress,
|
||||
});
|
||||
|
||||
return { retriedCount: failedDocIds.length };
|
||||
}
|
||||
|
||||
@@ -1,9 +1,78 @@
|
||||
import { prisma } from '../config/database.js';
|
||||
import { LLMFactory } from '../adapters/LLMFactory.js';
|
||||
import { Message, ModelType, StreamChunk } from '../adapters/types.js';
|
||||
import { prisma } from '../../config/database.js';
|
||||
import { LLMFactory } from '../../common/llm/adapters/LLMFactory.js';
|
||||
import { Message, ModelType, StreamChunk } from '../../common/llm/adapters/types.js';
|
||||
import { agentService } from './agentService.js';
|
||||
import * as knowledgeBaseService from './knowledgeBaseService.js';
|
||||
|
||||
/**
|
||||
* 引用信息接口
|
||||
*/
|
||||
interface Citation {
|
||||
id: number;
|
||||
fileName: string;
|
||||
position: number;
|
||||
score: number;
|
||||
content: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* 提取文本片段(用于引用上下文)
|
||||
* @param text 完整文本
|
||||
* @param maxLength 最大长度(默认100字)
|
||||
* @returns 提取的片段
|
||||
*/
|
||||
function extractContextPreview(text: string, maxLength: number = 100): string {
|
||||
if (!text) return '';
|
||||
|
||||
// 移除多余的空白字符
|
||||
const cleaned = text.replace(/\s+/g, ' ').trim();
|
||||
|
||||
// 如果文本短于限制,直接返回
|
||||
if (cleaned.length <= maxLength) {
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
// 截取前maxLength个字符,并尝试在句号、问号、感叹号处截断
|
||||
const truncated = cleaned.substring(0, maxLength);
|
||||
const lastPunctuation = Math.max(
|
||||
truncated.lastIndexOf('。'),
|
||||
truncated.lastIndexOf('!'),
|
||||
truncated.lastIndexOf('?'),
|
||||
truncated.lastIndexOf('.'),
|
||||
truncated.lastIndexOf('!'),
|
||||
truncated.lastIndexOf('?')
|
||||
);
|
||||
|
||||
// 如果找到了标点符号,在标点后截断;否则直接截断并加省略号
|
||||
if (lastPunctuation > maxLength * 0.5) {
|
||||
return truncated.substring(0, lastPunctuation + 1);
|
||||
}
|
||||
|
||||
return truncated + '...';
|
||||
}
|
||||
|
||||
/**
|
||||
* 格式化引用清单
|
||||
* @param citations 引用列表
|
||||
* @returns 格式化的引用清单字符串
|
||||
*/
|
||||
function formatCitations(citations: Citation[]): string {
|
||||
if (citations.length === 0) return '';
|
||||
|
||||
let result = '\n\n---\n\n📚 **参考文献**\n\n';
|
||||
|
||||
for (const cite of citations) {
|
||||
const scorePercent = (cite.score * 100).toFixed(0);
|
||||
const preview = extractContextPreview(cite.content, 100);
|
||||
|
||||
// 使用HTML span标签给引用编号添加id,方便跳转
|
||||
result += `<span id="citation-detail-${cite.id}">[${cite.id}]</span> 📄 **${cite.fileName}** - 第${cite.position}段 (相关度${scorePercent}%)\n`;
|
||||
result += ` "${preview}"\n\n`;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
interface CreateConversationData {
|
||||
userId: string;
|
||||
projectId: string;
|
||||
@@ -187,7 +256,7 @@ export class ConversationService {
|
||||
} else {
|
||||
// 后续消息:只发送用户输入和知识库上下文(如果有)
|
||||
if (knowledgeBaseContext) {
|
||||
userPromptContent = `${userInput}\n\n## 参考文献(来自知识库)\n${knowledgeBaseContext}`;
|
||||
userPromptContent = `${userInput}\n\n## 参考文献(来自知识库)\n\n**重要提示**:下面提供的文献片段已经用[来源N]进行了标记。请在回答中引用具体来源时使用对应的编号,如"根据[来源1]..."或"研究表明[来源3][来源5]..."。系统会在你回答结束后自动显示完整的引用清单。\n\n${knowledgeBaseContext}`;
|
||||
console.log(`📝 [assembleContext] 后续消息+知识库,总长度: ${userPromptContent.length}`);
|
||||
console.log(`📋 [assembleContext] userPromptContent预览:\n${userPromptContent.substring(0, 300)}...`);
|
||||
} else {
|
||||
@@ -233,6 +302,9 @@ export class ConversationService {
|
||||
|
||||
// 获取知识库上下文(如果有@知识库)
|
||||
let knowledgeBaseContext = '';
|
||||
const allCitations: Citation[] = []; // 存储所有引用信息
|
||||
let citationCounter = 1; // 全局引用计数器
|
||||
|
||||
if (knowledgeBaseIds && knowledgeBaseIds.length > 0) {
|
||||
const knowledgeResults: string[] = [];
|
||||
|
||||
@@ -243,7 +315,7 @@ export class ConversationService {
|
||||
userId,
|
||||
kbId,
|
||||
content,
|
||||
3 // 每个知识库返回3个最相关的段落
|
||||
15 // Phase 1优化:从3增加到15个最相关的段落
|
||||
);
|
||||
|
||||
// 格式化检索结果
|
||||
@@ -253,15 +325,27 @@ export class ConversationService {
|
||||
select: { name: true },
|
||||
});
|
||||
|
||||
knowledgeResults.push(
|
||||
`【知识库:${kbInfo?.name || '未命名'}】\n` +
|
||||
// 优化格式:使用[来源N]标记,便于AI引用
|
||||
const formattedResult = `【知识库:${kbInfo?.name || '未命名'}】\n` +
|
||||
searchResult.records
|
||||
.map((record: any, index: number) => {
|
||||
.map((record: any) => {
|
||||
const citationId = citationCounter++;
|
||||
const score = (record.score * 100).toFixed(1);
|
||||
return `${index + 1}. [相关度${score}%] ${record.segment.content}`;
|
||||
|
||||
// 保存引用信息
|
||||
allCitations.push({
|
||||
id: citationId,
|
||||
fileName: record.segment?.document?.name || record.document_name || '未知文档',
|
||||
position: record.segment?.position || record.segment_position || 0,
|
||||
score: record.score,
|
||||
content: record.segment?.content || record.content || '',
|
||||
});
|
||||
|
||||
return `[来源${citationId}] [相关度${score}%]\n${record.segment?.content || record.content}`;
|
||||
})
|
||||
.join('\n\n')
|
||||
);
|
||||
.join('\n\n');
|
||||
|
||||
knowledgeResults.push(formattedResult);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Failed to search knowledge base ${kbId}:`, error);
|
||||
@@ -297,6 +381,13 @@ export class ConversationService {
|
||||
topP: modelConfig?.topP,
|
||||
});
|
||||
|
||||
// AI回答完毕后,追加引用清单
|
||||
let finalContent = response.content;
|
||||
if (allCitations.length > 0) {
|
||||
const citationsText = formatCitations(allCitations);
|
||||
finalContent += citationsText;
|
||||
}
|
||||
|
||||
// 保存用户消息
|
||||
const userMessage = await prisma.message.create({
|
||||
data: {
|
||||
@@ -314,7 +405,7 @@ export class ConversationService {
|
||||
data: {
|
||||
conversationId,
|
||||
role: 'assistant',
|
||||
content: response.content,
|
||||
content: finalContent,
|
||||
model: response.model,
|
||||
tokens: response.usage?.totalTokens,
|
||||
metadata: {
|
||||
@@ -352,6 +443,9 @@ export class ConversationService {
|
||||
// 获取知识库上下文(如果有@知识库)
|
||||
console.log('📚 [sendMessageStream] 开始处理知识库', { knowledgeBaseIds });
|
||||
let knowledgeBaseContext = '';
|
||||
const allCitations: Citation[] = []; // 存储所有引用信息
|
||||
let citationCounter = 1; // 全局引用计数器
|
||||
|
||||
if (knowledgeBaseIds && knowledgeBaseIds.length > 0) {
|
||||
const knowledgeResults: string[] = [];
|
||||
|
||||
@@ -363,7 +457,7 @@ export class ConversationService {
|
||||
userId,
|
||||
kbId,
|
||||
content,
|
||||
3 // 每个知识库返回3个最相关的段落
|
||||
15 // Phase 1优化:从3增加到15个最相关的段落
|
||||
);
|
||||
|
||||
console.log(`✅ [sendMessageStream] 检索结果`, {
|
||||
@@ -378,11 +472,23 @@ export class ConversationService {
|
||||
select: { name: true },
|
||||
});
|
||||
|
||||
// 优化格式:使用[来源N]标记,便于AI引用
|
||||
const formattedResult = `【知识库:${kbInfo?.name || '未命名'}】\n` +
|
||||
searchResult.records
|
||||
.map((record: any, index: number) => {
|
||||
.map((record: any) => {
|
||||
const citationId = citationCounter++;
|
||||
const score = (record.score * 100).toFixed(1);
|
||||
return `${index + 1}. [相关度${score}%] ${record.segment.content}`;
|
||||
|
||||
// 保存引用信息
|
||||
allCitations.push({
|
||||
id: citationId,
|
||||
fileName: record.segment?.document?.name || record.document_name || '未知文档',
|
||||
position: record.segment?.position || record.segment_position || 0,
|
||||
score: record.score,
|
||||
content: record.segment?.content || record.content || '',
|
||||
});
|
||||
|
||||
return `[来源${citationId}] [相关度${score}%]\n${record.segment?.content || record.content}`;
|
||||
})
|
||||
.join('\n\n');
|
||||
|
||||
@@ -400,7 +506,7 @@ export class ConversationService {
|
||||
if (knowledgeResults.length > 0) {
|
||||
knowledgeBaseContext = knowledgeResults.join('\n\n---\n\n');
|
||||
console.log(`💾 [sendMessageStream] 知识库上下文总长度: ${knowledgeBaseContext.length} 字符`);
|
||||
console.log(`📋 [sendMessageStream] 知识库上下文预览:\n${knowledgeBaseContext.substring(0, 500)}...`);
|
||||
console.log(`📚 [sendMessageStream] 收集到 ${allCitations.length} 个引用`);
|
||||
} else {
|
||||
console.warn('⚠️ [sendMessageStream] 没有构建任何知识库上下文');
|
||||
}
|
||||
@@ -455,6 +561,19 @@ export class ConversationService {
|
||||
yield chunk;
|
||||
}
|
||||
|
||||
// AI回答完毕后,追加引用清单
|
||||
if (allCitations.length > 0) {
|
||||
console.log(`📚 [sendMessageStream] 追加 ${allCitations.length} 个引用清单`);
|
||||
const citationsText = formatCitations(allCitations);
|
||||
fullContent += citationsText;
|
||||
|
||||
// 将引用清单也流式输出
|
||||
yield {
|
||||
content: citationsText,
|
||||
done: false,
|
||||
};
|
||||
}
|
||||
|
||||
// 流式输出完成后,保存助手回复
|
||||
await prisma.message.create({
|
||||
data: {
|
||||
@@ -1,5 +1,6 @@
|
||||
import { prisma } from '../config/database.js';
|
||||
import { difyClient } from '../clients/DifyClient.js';
|
||||
import { prisma } from '../../config/database.js';
|
||||
import { difyClient } from '../../common/rag/DifyClient.js';
|
||||
import { extractionClient } from '../../common/document/ExtractionClient.js';
|
||||
|
||||
/**
|
||||
* 文档服务
|
||||
@@ -54,32 +55,64 @@ export async function uploadDocument(
|
||||
});
|
||||
|
||||
try {
|
||||
// 4. 上传到Dify
|
||||
// 4. Phase 2: 调用提取服务提取文本内容
|
||||
let extractionResult;
|
||||
let extractedText = '';
|
||||
let extractionMethod = '';
|
||||
let extractionQuality: number | null = null;
|
||||
let charCount: number | null = null;
|
||||
let detectedLanguage: string | null = null;
|
||||
|
||||
try {
|
||||
console.log(`[Phase2] 开始提取文档: ${filename}`);
|
||||
extractionResult = await extractionClient.extractDocument(file, filename);
|
||||
|
||||
if (extractionResult.success) {
|
||||
extractedText = extractionResult.text;
|
||||
extractionMethod = extractionResult.method;
|
||||
extractionQuality = extractionResult.quality || null;
|
||||
charCount = extractionResult.metadata?.char_count || null;
|
||||
detectedLanguage = extractionResult.language || null;
|
||||
|
||||
console.log(`[Phase2] 提取成功: method=${extractionMethod}, chars=${charCount}, language=${detectedLanguage}`);
|
||||
}
|
||||
} catch (extractionError) {
|
||||
console.error('[Phase2] 文档提取失败,但继续上传到Dify:', extractionError);
|
||||
// 提取失败不影响Dify上传,但记录错误
|
||||
}
|
||||
|
||||
// 5. 上传到Dify
|
||||
const difyResult = await difyClient.uploadDocumentDirectly(
|
||||
knowledgeBase.difyDatasetId,
|
||||
file,
|
||||
filename
|
||||
);
|
||||
|
||||
// 5. 更新文档记录(更新difyDocumentId和状态)
|
||||
// 6. 更新文档记录(更新difyDocumentId、状态和Phase 2字段)
|
||||
const updatedDocument = await prisma.document.update({
|
||||
where: { id: document.id },
|
||||
data: {
|
||||
difyDocumentId: difyResult.document.id,
|
||||
status: difyResult.document.indexing_status,
|
||||
progress: 50,
|
||||
// Phase 2新增字段
|
||||
extractedText: extractedText || null,
|
||||
extractionMethod: extractionMethod || null,
|
||||
extractionQuality: extractionQuality,
|
||||
charCount: charCount,
|
||||
language: detectedLanguage,
|
||||
},
|
||||
});
|
||||
|
||||
// 6. 启动后台轮询任务,等待处理完成
|
||||
// 7. 启动后台轮询任务,等待处理完成
|
||||
pollDocumentStatus(userId, kbId, document.id, difyResult.document.id).catch(error => {
|
||||
console.error('Failed to poll document status:', error);
|
||||
});
|
||||
|
||||
// 7. 更新知识库统计
|
||||
// 8. 更新知识库统计
|
||||
await updateKnowledgeBaseStats(kbId);
|
||||
|
||||
// 8. 转换BigInt为Number
|
||||
// 9. 转换BigInt为Number
|
||||
return {
|
||||
...updatedDocument,
|
||||
fileSizeBytes: Number(updatedDocument.fileSizeBytes),
|
||||
@@ -1,5 +1,6 @@
|
||||
import { prisma } from '../config/database.js';
|
||||
import { difyClient } from '../clients/DifyClient.js';
|
||||
import { prisma } from '../../config/database.js';
|
||||
import { difyClient } from '../../common/rag/DifyClient.js';
|
||||
import { calculateDocumentTokens, selectDocumentsForFullText, TOKEN_LIMITS } from './tokenService.js';
|
||||
|
||||
/**
|
||||
* 知识库服务
|
||||
@@ -194,7 +195,7 @@ export async function searchKnowledgeBase(
|
||||
userId: string,
|
||||
kbId: string,
|
||||
query: string,
|
||||
topK: number = 3
|
||||
topK: number = 15 // Phase 1优化:默认从3增加到15
|
||||
) {
|
||||
console.log('🔍 [searchKnowledgeBase] 开始检索', { kbId, query, topK });
|
||||
|
||||
@@ -288,3 +289,76 @@ export async function getKnowledgeBaseStats(userId: string, kbId: string) {
|
||||
return stats;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取知识库文档选择(用于全文阅读模式)
|
||||
* Phase 2新增:根据Token限制选择文档
|
||||
*/
|
||||
export async function getDocumentSelection(
|
||||
userId: string,
|
||||
kbId: string,
|
||||
maxFiles?: number,
|
||||
maxTokens?: number
|
||||
) {
|
||||
// 1. 验证权限
|
||||
const knowledgeBase = await prisma.knowledgeBase.findFirst({
|
||||
where: { id: kbId, userId },
|
||||
include: {
|
||||
documents: {
|
||||
where: {
|
||||
status: 'completed', // 只选择已完成的文档
|
||||
},
|
||||
select: {
|
||||
id: true,
|
||||
filename: true,
|
||||
extractedText: true,
|
||||
charCount: true,
|
||||
extractionMethod: true,
|
||||
tokensCount: true,
|
||||
fileSizeBytes: true,
|
||||
},
|
||||
orderBy: { uploadedAt: 'desc' },
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
if (!knowledgeBase) {
|
||||
throw new Error('Knowledge base not found or access denied');
|
||||
}
|
||||
|
||||
// 2. 计算每个文档的Token数
|
||||
const documentTokens = calculateDocumentTokens(knowledgeBase.documents);
|
||||
|
||||
// 3. 选择文档(根据Token限制)
|
||||
const selection = selectDocumentsForFullText(
|
||||
documentTokens,
|
||||
maxFiles || TOKEN_LIMITS.MAX_FILES,
|
||||
maxTokens || TOKEN_LIMITS.MAX_TOTAL_TOKENS
|
||||
);
|
||||
|
||||
// 4. 返回结果
|
||||
return {
|
||||
knowledgeBaseId: kbId,
|
||||
knowledgeBaseName: knowledgeBase.name,
|
||||
limits: {
|
||||
maxFiles: maxFiles || TOKEN_LIMITS.MAX_FILES,
|
||||
maxTokens: maxTokens || TOKEN_LIMITS.MAX_TOTAL_TOKENS,
|
||||
},
|
||||
selection: {
|
||||
selectedCount: selection.totalFiles,
|
||||
selectedTokens: selection.totalTokens,
|
||||
excludedCount: selection.excludedDocuments.length,
|
||||
availableTokens: selection.availableTokens,
|
||||
reason: selection.reason,
|
||||
},
|
||||
selectedDocuments: selection.selectedDocuments.map(doc => ({
|
||||
...doc,
|
||||
// 查找原始文档信息
|
||||
...knowledgeBase.documents.find(d => d.id === doc.documentId),
|
||||
})),
|
||||
excludedDocuments: selection.excludedDocuments.map(doc => ({
|
||||
...doc,
|
||||
// 查找原始文档信息
|
||||
...knowledgeBase.documents.find(d => d.id === doc.documentId),
|
||||
})),
|
||||
};
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
import { prisma } from '../config/database.js';
|
||||
import { prisma } from '../../config/database.js';
|
||||
|
||||
export interface CreateProjectDTO {
|
||||
name: string;
|
||||
452
backend/src/legacy/services/reviewService.ts
Normal file
452
backend/src/legacy/services/reviewService.ts
Normal file
@@ -0,0 +1,452 @@
|
||||
import { prisma } from '../../config/database.js';
|
||||
import { extractionClient } from '../../common/document/ExtractionClient.js';
|
||||
import { LLMFactory } from '../../common/llm/adapters/LLMFactory.js';
|
||||
import { ModelType } from '../../common/llm/adapters/types.js';
|
||||
import fs from 'fs/promises';
|
||||
import path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
/**
|
||||
* 稿件审查服务
|
||||
*/
|
||||
|
||||
// ==================== 类型定义 ====================
|
||||
|
||||
export interface EditorialItem {
|
||||
criterion: string;
|
||||
status: 'pass' | 'warning' | 'fail';
|
||||
score: number;
|
||||
issues: string[];
|
||||
suggestions: string[];
|
||||
}
|
||||
|
||||
export interface EditorialReview {
|
||||
overall_score: number;
|
||||
summary: string;
|
||||
items: EditorialItem[];
|
||||
}
|
||||
|
||||
export interface MethodologyIssue {
|
||||
type: string;
|
||||
severity: 'major' | 'minor';
|
||||
description: string;
|
||||
location: string;
|
||||
suggestion: string;
|
||||
}
|
||||
|
||||
export interface MethodologyPart {
|
||||
part: string;
|
||||
score: number;
|
||||
issues: MethodologyIssue[];
|
||||
}
|
||||
|
||||
export interface MethodologyReview {
|
||||
overall_score: number;
|
||||
summary: string;
|
||||
parts: MethodologyPart[];
|
||||
}
|
||||
|
||||
// ==================== 主要功能函数 ====================
|
||||
|
||||
/**
|
||||
* 审查稿件(主入口函数)
|
||||
* @param file 文件Buffer
|
||||
* @param filename 文件名
|
||||
* @param userId 用户ID
|
||||
* @param modelType 模型类型
|
||||
* @returns 审查任务
|
||||
*/
|
||||
export async function reviewManuscript(
|
||||
file: Buffer,
|
||||
filename: string,
|
||||
userId: string,
|
||||
modelType: ModelType = 'deepseek-v3'
|
||||
) {
|
||||
const startTime = Date.now();
|
||||
|
||||
// 1. 创建任务记录
|
||||
const task = await prisma.reviewTask.create({
|
||||
data: {
|
||||
userId,
|
||||
fileName: filename,
|
||||
fileSize: file.length,
|
||||
extractedText: '', // 初始为空
|
||||
status: 'pending',
|
||||
modelUsed: modelType,
|
||||
startedAt: new Date(),
|
||||
},
|
||||
});
|
||||
|
||||
try {
|
||||
// 2. 提取文档文本(异步执行,不阻塞响应)
|
||||
processReviewTask(task.id, file, filename, userId, modelType, startTime).catch(error => {
|
||||
console.error(`[ReviewService] Task ${task.id} failed:`, error);
|
||||
});
|
||||
|
||||
return task;
|
||||
} catch (error) {
|
||||
// 如果任务创建失败,更新状态
|
||||
await prisma.reviewTask.update({
|
||||
where: { id: task.id },
|
||||
data: {
|
||||
status: 'failed',
|
||||
errorMessage: error instanceof Error ? error.message : 'Unknown error',
|
||||
},
|
||||
});
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理审查任务(后台异步执行)
|
||||
*/
|
||||
async function processReviewTask(
|
||||
taskId: string,
|
||||
file: Buffer,
|
||||
filename: string,
|
||||
userId: string,
|
||||
modelType: ModelType,
|
||||
startTime: number
|
||||
) {
|
||||
try {
|
||||
// 1. 更新状态为extracting
|
||||
await prisma.reviewTask.update({
|
||||
where: { id: taskId },
|
||||
data: { status: 'extracting' },
|
||||
});
|
||||
|
||||
// 2. 提取文档文本
|
||||
console.log(`[ReviewService] 开始提取文档: ${filename}`);
|
||||
const extractionResult = await extractionClient.extractDocument(file, filename);
|
||||
|
||||
if (!extractionResult.success || !extractionResult.text) {
|
||||
throw new Error('文档提取失败或内容为空');
|
||||
}
|
||||
|
||||
const extractedText = extractionResult.text;
|
||||
const wordCount = extractionResult.metadata?.char_count || extractedText.length;
|
||||
|
||||
console.log(`[ReviewService] 提取成功: ${wordCount} 字符`);
|
||||
|
||||
// 更新提取的文本
|
||||
await prisma.reviewTask.update({
|
||||
where: { id: taskId },
|
||||
data: {
|
||||
extractedText,
|
||||
wordCount,
|
||||
status: 'reviewing_editorial',
|
||||
},
|
||||
});
|
||||
|
||||
// 3. 执行稿约规范性评估
|
||||
console.log(`[ReviewService] 开始稿约规范性评估...`);
|
||||
const editorialReview = await reviewEditorialStandards(extractedText, modelType);
|
||||
|
||||
await prisma.reviewTask.update({
|
||||
where: { id: taskId },
|
||||
data: {
|
||||
editorialReview: editorialReview as any,
|
||||
status: 'reviewing_methodology',
|
||||
},
|
||||
});
|
||||
|
||||
// 4. 执行方法学评估
|
||||
console.log(`[ReviewService] 开始方法学评估...`);
|
||||
const methodologyReview = await reviewMethodology(extractedText, modelType);
|
||||
|
||||
// 5. 计算总体评分(加权平均:稿约40% + 方法学60%)
|
||||
const overallScore = editorialReview.overall_score * 0.4 + methodologyReview.overall_score * 0.6;
|
||||
|
||||
// 6. 完成任务
|
||||
const endTime = Date.now();
|
||||
const durationSeconds = Math.floor((endTime - startTime) / 1000);
|
||||
|
||||
await prisma.reviewTask.update({
|
||||
where: { id: taskId },
|
||||
data: {
|
||||
methodologyReview: methodologyReview as any,
|
||||
overallScore,
|
||||
status: 'completed',
|
||||
completedAt: new Date(),
|
||||
durationSeconds,
|
||||
},
|
||||
});
|
||||
|
||||
console.log(`[ReviewService] 任务完成: ${taskId}, 总分: ${overallScore.toFixed(1)}, 耗时: ${durationSeconds}s`);
|
||||
} catch (error) {
|
||||
console.error(`[ReviewService] 任务处理失败:`, error);
|
||||
|
||||
// 更新任务状态为failed
|
||||
await prisma.reviewTask.update({
|
||||
where: { id: taskId },
|
||||
data: {
|
||||
status: 'failed',
|
||||
errorMessage: error instanceof Error ? error.message : 'Unknown error',
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 稿约规范性评估
|
||||
* @param text 稿件文本
|
||||
* @param modelType 模型类型
|
||||
* @returns 评估结果
|
||||
*/
|
||||
export async function reviewEditorialStandards(
|
||||
text: string,
|
||||
modelType: ModelType = 'deepseek-v3'
|
||||
): Promise<EditorialReview> {
|
||||
try {
|
||||
// 1. 读取系统Prompt
|
||||
const promptPath = path.join(__dirname, '../../../prompts/review_editorial_system.txt');
|
||||
const systemPrompt = await fs.readFile(promptPath, 'utf-8');
|
||||
|
||||
// 2. 构建消息
|
||||
const messages = [
|
||||
{ role: 'system' as const, content: systemPrompt },
|
||||
{ role: 'user' as const, content: `请对以下稿件进行稿约规范性评估:\n\n${text}` },
|
||||
];
|
||||
|
||||
// 3. 调用LLM
|
||||
console.log(`[ReviewService] 开始调用 ${modelType} 进行稿约规范性评估...`);
|
||||
const llmAdapter = LLMFactory.getAdapter(modelType);
|
||||
const response = await llmAdapter.chat(messages, {
|
||||
temperature: 0.3, // 较低温度以获得更稳定的评估
|
||||
maxTokens: 8000, // 增加token限制,确保完整输出
|
||||
});
|
||||
console.log(`[ReviewService] ${modelType} 稿约规范性评估完成,响应长度: ${response.content.length}`);
|
||||
|
||||
// 4. 解析JSON响应
|
||||
const result = parseJSONFromLLMResponse<EditorialReview>(response.content);
|
||||
|
||||
// 5. 验证响应格式
|
||||
if (!result || typeof result.overall_score !== 'number' || !Array.isArray(result.items)) {
|
||||
throw new Error('LLM返回的数据格式不正确');
|
||||
}
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
console.error('[ReviewService] 稿约规范性评估失败:', error);
|
||||
if (error instanceof Error) {
|
||||
console.error('[ReviewService] 错误详情:', {
|
||||
message: error.message,
|
||||
stack: error.stack,
|
||||
});
|
||||
}
|
||||
throw new Error(`稿约规范性评估失败: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 方法学评估
|
||||
* @param text 稿件文本
|
||||
* @param modelType 模型类型
|
||||
* @returns 评估结果
|
||||
*/
|
||||
export async function reviewMethodology(
|
||||
text: string,
|
||||
modelType: ModelType = 'deepseek-v3'
|
||||
): Promise<MethodologyReview> {
|
||||
try {
|
||||
// 1. 读取系统Prompt
|
||||
const promptPath = path.join(__dirname, '../../../prompts/review_methodology_system.txt');
|
||||
const systemPrompt = await fs.readFile(promptPath, 'utf-8');
|
||||
|
||||
// 2. 构建消息
|
||||
const messages = [
|
||||
{ role: 'system' as const, content: systemPrompt },
|
||||
{ role: 'user' as const, content: `请对以下稿件进行方法学评估:\n\n${text}` },
|
||||
];
|
||||
|
||||
// 3. 调用LLM
|
||||
console.log(`[ReviewService] 开始调用 ${modelType} 进行方法学评估...`);
|
||||
const llmAdapter = LLMFactory.getAdapter(modelType);
|
||||
const response = await llmAdapter.chat(messages, {
|
||||
temperature: 0.3,
|
||||
maxTokens: 8000, // 增加token限制,确保完整输出
|
||||
});
|
||||
console.log(`[ReviewService] ${modelType} 方法学评估完成,响应长度: ${response.content.length}`);
|
||||
|
||||
// 4. 解析JSON响应
|
||||
const result = parseJSONFromLLMResponse<MethodologyReview>(response.content);
|
||||
|
||||
// 5. 验证响应格式
|
||||
if (!result || typeof result.overall_score !== 'number' || !Array.isArray(result.parts)) {
|
||||
throw new Error('LLM返回的数据格式不正确');
|
||||
}
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
console.error('[ReviewService] 方法学评估失败:', error);
|
||||
if (error instanceof Error) {
|
||||
console.error('[ReviewService] 错误详情:', {
|
||||
message: error.message,
|
||||
stack: error.stack,
|
||||
});
|
||||
}
|
||||
throw new Error(`方法学评估失败: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从LLM响应中解析JSON
|
||||
* 支持多种格式:纯JSON、```json代码块、混合文本
|
||||
*/
|
||||
function parseJSONFromLLMResponse<T>(content: string): T {
|
||||
try {
|
||||
// 1. 尝试直接解析
|
||||
return JSON.parse(content) as T;
|
||||
} catch {
|
||||
// 2. 尝试提取```json代码块
|
||||
const jsonMatch = content.match(/```json\s*\n?([\s\S]*?)\n?```/);
|
||||
if (jsonMatch) {
|
||||
try {
|
||||
return JSON.parse(jsonMatch[1].trim()) as T;
|
||||
} catch {
|
||||
// 继续尝试其他方法
|
||||
}
|
||||
}
|
||||
|
||||
// 3. 尝试提取{}或[]包裹的内容
|
||||
const objectMatch = content.match(/(\{[\s\S]*\})/);
|
||||
if (objectMatch) {
|
||||
try {
|
||||
return JSON.parse(objectMatch[1]) as T;
|
||||
} catch {
|
||||
// 继续尝试其他方法
|
||||
}
|
||||
}
|
||||
|
||||
const arrayMatch = content.match(/(\[[\s\S]*\])/);
|
||||
if (arrayMatch) {
|
||||
try {
|
||||
return JSON.parse(arrayMatch[1]) as T;
|
||||
} catch {
|
||||
// 失败
|
||||
}
|
||||
}
|
||||
|
||||
// 4. 所有尝试都失败
|
||||
throw new Error('无法从LLM响应中解析JSON');
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== 任务查询与管理 ====================
|
||||
|
||||
/**
|
||||
* 获取任务详情
|
||||
*/
|
||||
export async function getReviewTask(userId: string, taskId: string) {
|
||||
const task = await prisma.reviewTask.findFirst({
|
||||
where: {
|
||||
id: taskId,
|
||||
userId,
|
||||
},
|
||||
});
|
||||
|
||||
if (!task) {
|
||||
throw new Error('Task not found or access denied');
|
||||
}
|
||||
|
||||
return task;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取用户的所有审查任务
|
||||
*/
|
||||
export async function getReviewTasks(userId: string, page: number = 1, limit: number = 20) {
|
||||
const skip = (page - 1) * limit;
|
||||
|
||||
const [tasks, total] = await Promise.all([
|
||||
prisma.reviewTask.findMany({
|
||||
where: { userId },
|
||||
orderBy: { createdAt: 'desc' },
|
||||
skip,
|
||||
take: limit,
|
||||
select: {
|
||||
id: true,
|
||||
fileName: true,
|
||||
fileSize: true,
|
||||
status: true,
|
||||
overallScore: true,
|
||||
modelUsed: true,
|
||||
createdAt: true,
|
||||
completedAt: true,
|
||||
durationSeconds: true,
|
||||
wordCount: true,
|
||||
},
|
||||
}),
|
||||
prisma.reviewTask.count({
|
||||
where: { userId },
|
||||
}),
|
||||
]);
|
||||
|
||||
return {
|
||||
tasks,
|
||||
pagination: {
|
||||
page,
|
||||
limit,
|
||||
total,
|
||||
totalPages: Math.ceil(total / limit),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除审查任务
|
||||
*/
|
||||
export async function deleteReviewTask(userId: string, taskId: string) {
|
||||
const task = await prisma.reviewTask.findFirst({
|
||||
where: {
|
||||
id: taskId,
|
||||
userId,
|
||||
},
|
||||
});
|
||||
|
||||
if (!task) {
|
||||
throw new Error('Task not found or access denied');
|
||||
}
|
||||
|
||||
await prisma.reviewTask.delete({
|
||||
where: { id: taskId },
|
||||
});
|
||||
|
||||
return { success: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取任务报告(完整的评估结果)
|
||||
*/
|
||||
export async function getReviewReport(userId: string, taskId: string) {
|
||||
const task = await prisma.reviewTask.findFirst({
|
||||
where: {
|
||||
id: taskId,
|
||||
userId,
|
||||
},
|
||||
});
|
||||
|
||||
if (!task) {
|
||||
throw new Error('Task not found or access denied');
|
||||
}
|
||||
|
||||
if (task.status !== 'completed') {
|
||||
throw new Error('Report is not ready yet. Task status: ' + task.status);
|
||||
}
|
||||
|
||||
return {
|
||||
taskId: task.id,
|
||||
fileName: task.fileName,
|
||||
wordCount: task.wordCount,
|
||||
modelUsed: task.modelUsed,
|
||||
overallScore: task.overallScore,
|
||||
editorialReview: task.editorialReview,
|
||||
methodologyReview: task.methodologyReview,
|
||||
completedAt: task.completedAt,
|
||||
durationSeconds: task.durationSeconds,
|
||||
};
|
||||
}
|
||||
|
||||
232
backend/src/legacy/services/tokenService.ts
Normal file
232
backend/src/legacy/services/tokenService.ts
Normal file
@@ -0,0 +1,232 @@
|
||||
import { encoding_for_model, Tiktoken } from 'tiktoken';
|
||||
|
||||
/**
|
||||
* Token计数服务
|
||||
* 用于全文阅读模式的Token管理
|
||||
*/
|
||||
|
||||
// Token限制配置
|
||||
export const TOKEN_LIMITS = {
|
||||
MAX_FILES: 50, // 最多50个文件
|
||||
MAX_TOTAL_TOKENS: 980000, // 最多980K tokens(为Qwen-Long 1M上下文留20K余量)
|
||||
CONTEXT_RESERVE: 20000, // 预留给系统提示词和用户查询的token
|
||||
};
|
||||
|
||||
// 缓存编码器
|
||||
let encoderCache: Tiktoken | null = null;
|
||||
|
||||
/**
|
||||
* 获取编码器(使用gpt-4作为Qwen的替代)
|
||||
*/
|
||||
function getEncoder(): Tiktoken {
|
||||
if (!encoderCache) {
|
||||
// Qwen使用类似GPT-4的tokenizer
|
||||
encoderCache = encoding_for_model('gpt-4');
|
||||
}
|
||||
return encoderCache;
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算文本的Token数
|
||||
*/
|
||||
export function countTokens(text: string): number {
|
||||
if (!text || text.trim().length === 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
try {
|
||||
const encoder = getEncoder();
|
||||
const tokens = encoder.encode(text);
|
||||
return tokens.length;
|
||||
} catch (error) {
|
||||
console.error('[TokenService] Failed to count tokens:', error);
|
||||
// 降级:粗略估算(中文约1.5字符/token,英文约4字符/token)
|
||||
const chineseChars = (text.match(/[\u4e00-\u9fff]/g) || []).length;
|
||||
const totalChars = text.length;
|
||||
const englishChars = totalChars - chineseChars;
|
||||
|
||||
return Math.ceil(chineseChars / 1.5 + englishChars / 4);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 批量计算多个文本的Token数
|
||||
*/
|
||||
export function countTokensBatch(texts: string[]): number[] {
|
||||
return texts.map(text => countTokens(text));
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算文档Token数(基于提取的文本)
|
||||
*/
|
||||
export interface DocumentTokenInfo {
|
||||
documentId: string;
|
||||
filename: string;
|
||||
charCount: number;
|
||||
estimatedTokens: number;
|
||||
extractionMethod?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* 为文档列表计算Token数
|
||||
*/
|
||||
export function calculateDocumentTokens(
|
||||
documents: Array<{
|
||||
id: string;
|
||||
filename: string;
|
||||
extractedText?: string | null;
|
||||
charCount?: number | null;
|
||||
extractionMethod?: string | null;
|
||||
}>
|
||||
): DocumentTokenInfo[] {
|
||||
return documents.map(doc => {
|
||||
let estimatedTokens = 0;
|
||||
|
||||
if (doc.extractedText) {
|
||||
// 使用提取的文本计算精确token数
|
||||
estimatedTokens = countTokens(doc.extractedText);
|
||||
} else if (doc.charCount) {
|
||||
// 如果没有提取文本,使用字符数估算
|
||||
// 假设中英文混合,平均2.5字符/token
|
||||
estimatedTokens = Math.ceil(doc.charCount / 2.5);
|
||||
}
|
||||
|
||||
return {
|
||||
documentId: doc.id,
|
||||
filename: doc.filename,
|
||||
charCount: doc.charCount || 0,
|
||||
estimatedTokens,
|
||||
extractionMethod: doc.extractionMethod || undefined,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 选择文档以满足Token限制
|
||||
* 策略:优先选择Token数少的文档,直到达到限制
|
||||
*/
|
||||
export interface DocumentSelectionResult {
|
||||
selectedDocuments: DocumentTokenInfo[];
|
||||
totalTokens: number;
|
||||
totalFiles: number;
|
||||
excludedDocuments: DocumentTokenInfo[];
|
||||
reason: 'all_included' | 'file_limit' | 'token_limit';
|
||||
availableTokens: number;
|
||||
}
|
||||
|
||||
export function selectDocumentsForFullText(
|
||||
documents: DocumentTokenInfo[],
|
||||
maxFiles: number = TOKEN_LIMITS.MAX_FILES,
|
||||
maxTokens: number = TOKEN_LIMITS.MAX_TOTAL_TOKENS
|
||||
): DocumentSelectionResult {
|
||||
// 按Token数升序排序(优先选择小文件)
|
||||
const sortedDocs = [...documents].sort(
|
||||
(a, b) => a.estimatedTokens - b.estimatedTokens
|
||||
);
|
||||
|
||||
const selected: DocumentTokenInfo[] = [];
|
||||
const excluded: DocumentTokenInfo[] = [];
|
||||
let totalTokens = 0;
|
||||
|
||||
for (const doc of sortedDocs) {
|
||||
// 检查文件数限制
|
||||
if (selected.length >= maxFiles) {
|
||||
excluded.push(doc);
|
||||
continue;
|
||||
}
|
||||
|
||||
// 检查Token限制
|
||||
if (totalTokens + doc.estimatedTokens > maxTokens) {
|
||||
excluded.push(doc);
|
||||
continue;
|
||||
}
|
||||
|
||||
// 添加到选中列表
|
||||
selected.push(doc);
|
||||
totalTokens += doc.estimatedTokens;
|
||||
}
|
||||
|
||||
// 判断限制原因
|
||||
let reason: 'all_included' | 'file_limit' | 'token_limit' = 'all_included';
|
||||
if (excluded.length > 0) {
|
||||
if (selected.length >= maxFiles) {
|
||||
reason = 'file_limit';
|
||||
} else {
|
||||
reason = 'token_limit';
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
selectedDocuments: selected,
|
||||
totalTokens,
|
||||
totalFiles: selected.length,
|
||||
excludedDocuments: excluded,
|
||||
reason,
|
||||
availableTokens: maxTokens - totalTokens,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 估算查询需要的Token数
|
||||
*/
|
||||
export function estimateQueryTokens(query: string, systemPrompt?: string): number {
|
||||
let total = countTokens(query);
|
||||
|
||||
if (systemPrompt) {
|
||||
total += countTokens(systemPrompt);
|
||||
}
|
||||
|
||||
// 为响应预留空间
|
||||
total += 2000; // 假设响应最多2000 tokens
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查是否超过Token限制
|
||||
*/
|
||||
export function checkTokenLimit(
|
||||
documentsTokens: number,
|
||||
queryTokens: number,
|
||||
maxTokens: number = TOKEN_LIMITS.MAX_TOTAL_TOKENS
|
||||
): {
|
||||
withinLimit: boolean;
|
||||
totalTokens: number;
|
||||
maxTokens: number;
|
||||
remaining: number;
|
||||
} {
|
||||
const totalTokens = documentsTokens + queryTokens;
|
||||
const remaining = maxTokens - totalTokens;
|
||||
|
||||
return {
|
||||
withinLimit: remaining >= 0,
|
||||
totalTokens,
|
||||
maxTokens,
|
||||
remaining,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 释放编码器(清理资源)
|
||||
*/
|
||||
export function cleanup() {
|
||||
if (encoderCache) {
|
||||
encoderCache.free();
|
||||
encoderCache = null;
|
||||
}
|
||||
}
|
||||
|
||||
// 进程退出时清理
|
||||
if (typeof process !== 'undefined') {
|
||||
process.on('exit', cleanup);
|
||||
process.on('SIGINT', () => {
|
||||
cleanup();
|
||||
process.exit();
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
152
backend/src/legacy/templates/clinicalResearch.ts
Normal file
152
backend/src/legacy/templates/clinicalResearch.ts
Normal file
@@ -0,0 +1,152 @@
|
||||
/**
|
||||
* Phase 3: 批处理模式 - 临床研究信息提取模板
|
||||
*
|
||||
* 提取临床研究的8个核心字段:
|
||||
* 1. 研究目的
|
||||
* 2. 研究设计
|
||||
* 3. 研究对象
|
||||
* 4. 样本量(text类型,保留原文描述)
|
||||
* 5. 干预组
|
||||
* 6. 对照组
|
||||
* 7. 结果及数据
|
||||
* 8. 牛津评级(提供详细标准)
|
||||
*/
|
||||
|
||||
export interface TemplateField {
|
||||
key: string;
|
||||
label: string;
|
||||
type: 'text' | 'longtext' | 'number';
|
||||
description?: string;
|
||||
}
|
||||
|
||||
export interface BatchTemplate {
|
||||
id: string;
|
||||
name: string;
|
||||
description: string;
|
||||
outputFields: TemplateField[];
|
||||
systemPrompt: string;
|
||||
userPrompt: string;
|
||||
}
|
||||
|
||||
export const CLINICAL_RESEARCH_TEMPLATE: BatchTemplate = {
|
||||
id: 'clinical_research',
|
||||
name: '临床研究信息提取',
|
||||
description: '提取研究目的、设计、对象、样本量、干预、对照、结果、证据等级',
|
||||
|
||||
outputFields: [
|
||||
{
|
||||
key: 'research_purpose',
|
||||
label: '研究目的',
|
||||
type: 'text',
|
||||
description: '研究想要解决的问题或验证的假设'
|
||||
},
|
||||
{
|
||||
key: 'research_design',
|
||||
label: '研究设计',
|
||||
type: 'text',
|
||||
description: '研究类型(RCT、队列研究等)'
|
||||
},
|
||||
{
|
||||
key: 'research_subjects',
|
||||
label: '研究对象',
|
||||
type: 'text',
|
||||
description: '纳入/排除标准、人群特征'
|
||||
},
|
||||
{
|
||||
key: 'sample_size',
|
||||
label: '样本量',
|
||||
type: 'text', // ✅ text类型,保留原文描述
|
||||
description: '实际纳入的受试者人数'
|
||||
},
|
||||
{
|
||||
key: 'intervention_group',
|
||||
label: '干预组',
|
||||
type: 'text',
|
||||
description: '实验组的干预措施'
|
||||
},
|
||||
{
|
||||
key: 'control_group',
|
||||
label: '对照组',
|
||||
type: 'text',
|
||||
description: '对照组的情况'
|
||||
},
|
||||
{
|
||||
key: 'results_data',
|
||||
label: '结果及数据',
|
||||
type: 'longtext',
|
||||
description: '主要结局指标的具体数据'
|
||||
},
|
||||
{
|
||||
key: 'oxford_level',
|
||||
label: '牛津评级',
|
||||
type: 'text',
|
||||
description: '证据等级(1a-5)'
|
||||
},
|
||||
],
|
||||
|
||||
systemPrompt: `你是一个专业的临床研究数据提取助手。
|
||||
你的任务是从临床研究文献中提取结构化信息。
|
||||
你的回答必须严格遵循JSON格式,不要有任何额外的文字说明。`,
|
||||
|
||||
userPrompt: `请仔细阅读这篇临床研究文献,提取以下信息:
|
||||
|
||||
1. **研究目的**:本研究想要解决什么问题或验证什么假设?用1-2句话概括。
|
||||
|
||||
2. **研究设计**:研究类型,如随机对照试验(RCT)、队列研究、病例对照研究、横断面研究、系统评价/Meta分析等。
|
||||
|
||||
3. **研究对象**:描述纳入标准、排除标准、人群特征(年龄、性别、疾病状态等)。
|
||||
|
||||
4. **样本量**:实际纳入的受试者人数,保留原文描述(如"干预组156人,对照组152人,共308人")。
|
||||
|
||||
5. **干预组**:实验组接受的治疗或干预措施,包括药物名称、剂量、给药方式、疗程等。
|
||||
|
||||
6. **对照组**:对照组的情况,如安慰剂、标准治疗、空白对照等。
|
||||
|
||||
7. **结果及数据**:主要结局指标的具体数据、统计结果、P值、置信区间等。包括基线数据对比和终点数据对比。
|
||||
|
||||
8. **牛津评级**:根据研究设计判断证据等级,参考以下标准:
|
||||
- **1a**:系统评价/Meta分析(多个RCT的汇总分析)
|
||||
- **1b**:单个随机对照试验(RCT)
|
||||
- **2a**:设计良好的对照研究(无随机化)
|
||||
- **2b**:设计良好的准实验研究(队列研究、病例对照研究)
|
||||
- **3a**:描述性研究(横断面研究、病例系列)
|
||||
- **3b**:个案报告(单一病例)
|
||||
- **4**:专家意见、共识声明
|
||||
- **5**:基础研究(动物实验、体外研究)
|
||||
|
||||
请严格按照以下JSON格式输出,不要有任何额外说明或前言:
|
||||
{
|
||||
"research_purpose": "...",
|
||||
"research_design": "...",
|
||||
"research_subjects": "...",
|
||||
"sample_size": "...",
|
||||
"intervention_group": "...",
|
||||
"control_group": "...",
|
||||
"results_data": "...",
|
||||
"oxford_level": "..."
|
||||
}`,
|
||||
};
|
||||
|
||||
// 导出所有预设模板
|
||||
export const PRESET_TEMPLATES: Record<string, BatchTemplate> = {
|
||||
[CLINICAL_RESEARCH_TEMPLATE.id]: CLINICAL_RESEARCH_TEMPLATE,
|
||||
};
|
||||
|
||||
// 获取模板
|
||||
export function getTemplate(templateId: string): BatchTemplate | null {
|
||||
return PRESET_TEMPLATES[templateId] || null;
|
||||
}
|
||||
|
||||
// 获取所有模板列表
|
||||
export function getAllTemplates(): BatchTemplate[] {
|
||||
return Object.values(PRESET_TEMPLATES);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user