feat(pkb): Complete PKB module frontend migration with V3 design

Summary:
- Implement PKB Dashboard and Workspace pages based on V3 prototype
- Add single-layer header with integrated Tab navigation
- Implement 3 work modes: Full Text, Deep Read, Batch Processing
- Integrate Ant Design X Chat component for AI conversations
- Create BatchModeComplete with template selection and document processing
- Add compact work mode selector with dropdown design

Backend:
- Migrate PKB controllers and services to /modules/pkb structure
- Register v2 API routes at /api/v2/pkb/knowledge
- Maintain dual API routes for backward compatibility

Technical details:
- Use Zustand for state management
- Handle SSE streaming responses for AI chat
- Support document selection for Deep Read mode
- Implement batch processing with progress tracking

Known issues:
- Batch processing API integration pending
- Knowledge assets page navigation needs optimization

Status: Frontend functional, pending refinement
This commit is contained in:
2026-01-06 22:15:42 +08:00
parent b31255031e
commit 5a17d096a7
226 changed files with 14899 additions and 224 deletions

View File

@@ -301,6 +301,10 @@ export function getBatchItems<T>(

View File

@@ -12,6 +12,7 @@ import { batchRoutes } from './legacy/routes/batchRoutes.js';
import reviewRoutes from './legacy/routes/reviewRoutes.js';
import { aslRoutes } from './modules/asl/routes/index.js';
import { registerDCRoutes, initDCModule } from './modules/dc/index.js';
import pkbRoutes from './modules/pkb/routes/index.js';
import { registerHealthRoutes } from './common/health/index.js';
import { logger } from './common/logging/index.js';
import { registerTestRoutes } from './test-platform-api.js';
@@ -111,6 +112,13 @@ await fastify.register(batchRoutes, { prefix: '/api/v1' });
// 注册稿件审查路由
await fastify.register(reviewRoutes, { prefix: '/api/v1' });
// ============================================
// 【业务模块】PKB - 个人知识库(新架构 v2
// ============================================
await fastify.register(pkbRoutes, { prefix: '/api/v2/pkb' });
logger.info('✅ PKB个人知识库路由已注册v2新架构: /api/v2/pkb');
logger.info(' ⚠️ 旧版路由仍可用: /api/v1/knowledge, /api/v1/batch-tasks');
// ============================================
// 【业务模块】ASL - AI智能文献筛选
// ============================================

View File

@@ -337,6 +337,10 @@ runTests().catch((error) => {

View File

@@ -316,6 +316,10 @@ Content-Type: application/json

View File

@@ -252,6 +252,10 @@ export const conflictDetectionService = new ConflictDetectionService();

View File

@@ -202,6 +202,10 @@ curl -X POST http://localhost:3000/api/v1/dc/tool-c/test/execute \

View File

@@ -256,6 +256,10 @@ export const streamAIController = new StreamAIController();

View File

@@ -170,3 +170,7 @@ logger.info('[SessionMemory] 会话记忆管理器已启动', {

View File

@@ -104,3 +104,7 @@ async function checkTableStructure() {
checkTableStructure();

View File

@@ -91,3 +91,7 @@ checkProjectConfig().catch(console.error);

View File

@@ -73,3 +73,7 @@ main();

View File

@@ -0,0 +1,335 @@
/**
* 微信服务号回调控制器 - 明文模式Plain Text Mode
*
* 功能:
* 1. URL验证GET请求
* 2. 接收消息推送POST请求
* 3. 签名校验SHA1
* 4. 不进行消息加解密
*
* 官方文档:
* https://developers.weixin.qq.com/doc/offiaccount/Basic_Information/Access_Overview.html
*
* @author AI Assistant
* @date 2026-01-04
*/
import { FastifyRequest, FastifyReply } from 'fastify';
import crypto from 'crypto';
import { logger } from '../../../common/logging/index.js';
/**
* URL验证请求参数
*/
interface WechatMpVerifyQuery {
signature: string; // 微信加密签名
timestamp: string; // 时间戳
nonce: string; // 随机数
echostr: string; // 随机字符串
}
/**
* 消息推送回调请求参数
*/
interface WechatMpCallbackQuery {
signature: string; // 微信加密签名
timestamp: string; // 时间戳
nonce: string; // 随机数
}
/**
* 微信服务号回调控制器(明文模式)
*/
export class PatientWechatCallbackPlainController {
private token: string;
constructor() {
// 从环境变量读取Token
this.token = process.env.WECHAT_MP_TOKEN || '';
if (!this.token) {
logger.error('⚠️ WECHAT_MP_TOKEN 未配置!');
throw new Error('微信服务号Token未配置');
}
logger.info('✅ 微信服务号回调控制器已初始化(明文模式)', {
token: this.token.substring(0, 10) + '...',
});
}
/**
* 处理URL验证GET请求
*
* 微信服务器会发送GET请求验证服务器地址的有效性
* GET /wechat/patient/callback?signature=xxx&timestamp=xxx&nonce=xxx&echostr=xxx
*
* 验证步骤:
* 1. 将token、timestamp、nonce三个参数进行字典序排序
* 2. 将三个参数字符串拼接成一个字符串进行sha1加密
* 3. 将加密后的字符串与signature对比如果相同则返回echostr
*/
async handleVerification(
request: FastifyRequest<{ Querystring: WechatMpVerifyQuery }>,
reply: FastifyReply
): Promise<void> {
const { signature, timestamp, nonce, echostr } = request.query;
logger.info('📥 收到微信服务号 URL 验证请求(明文模式)', {
signature,
timestamp,
nonce,
echostr: echostr ? echostr.substring(0, 20) + '...' : undefined,
});
try {
// 验证签名
const isValid = this.verifySignature(signature, timestamp, nonce);
if (!isValid) {
logger.error('❌ URL 验证失败:签名不匹配', {
signature,
timestamp,
nonce,
});
reply.code(403).send('Signature verification failed');
return;
}
logger.info('✅ URL 验证成功,返回 echostr', {
echostr: echostr.substring(0, 20) + '...',
});
// 验证成功原样返回echostr纯文本
reply.type('text/plain').send(echostr);
} catch (error) {
logger.error('❌ URL 验证异常', {
error: error instanceof Error ? error.message : String(error),
stack: error instanceof Error ? error.stack : undefined,
});
reply.code(500).send('Internal Server Error');
}
}
/**
* 处理消息推送回调POST请求
*
* 微信服务器推送消息时会发送POST请求
* POST /wechat/patient/callback?signature=xxx&timestamp=xxx&nonce=xxx
* Body: XML格式的消息内容明文模式
*/
async handleCallback(
request: FastifyRequest<{
Querystring: WechatMpCallbackQuery;
Body: string; // XML格式字符串
}>,
reply: FastifyReply
): Promise<void> {
const { signature, timestamp, nonce } = request.query;
const body = request.body;
logger.info('📥 收到微信服务号回调消息(明文模式)', {
signature,
timestamp,
nonce,
bodyType: typeof body,
bodyLength: JSON.stringify(body).length,
});
try {
// 验证签名
const isValid = this.verifySignature(signature, timestamp, nonce);
if (!isValid) {
logger.error('❌ 消息推送验证失败:签名不匹配', {
signature,
timestamp,
nonce,
});
reply.code(403).send('Signature verification failed');
return;
}
logger.info('✅ 签名验证成功');
// 立即返回success5秒内必须返回
reply.type('text/plain').send('success');
// 异步处理消息
this.processMessageAsync(body).catch((error) => {
logger.error('❌ 异步消息处理失败', {
error: error instanceof Error ? error.message : String(error),
});
});
} catch (error) {
logger.error('❌ 消息推送处理异常', {
error: error instanceof Error ? error.message : String(error),
stack: error instanceof Error ? error.stack : undefined,
});
reply.code(500).send('Internal Server Error');
}
}
/**
* 验证微信签名(明文模式)
*
* 签名计算方法:
* 1. 将token、timestamp、nonce三个参数进行字典序排序
* 2. 将三个参数字符串拼接成一个字符串
* 3. 进行sha1加密
* 4. 与signature参数对比
*
* @param signature 微信传递的签名
* @param timestamp 时间戳
* @param nonce 随机数
* @returns 验证结果
*/
private verifySignature(signature: string, timestamp: string, nonce: string): boolean {
try {
// 1. 字典序排序
const arr = [this.token, timestamp, nonce].sort();
// 2. 拼接字符串
const str = arr.join('');
// 3. SHA1加密
const hash = crypto.createHash('sha1').update(str).digest('hex');
logger.debug('🔐 签名验证详情', {
token: this.token.substring(0, 10) + '...',
timestamp,
nonce,
sortedArray: arr.map(s => s.substring(0, 10) + (s.length > 10 ? '...' : '')),
concatenatedString: str.substring(0, 50) + (str.length > 50 ? '...' : ''),
calculatedHash: hash,
receivedSignature: signature,
isMatch: hash === signature,
});
// 4. 对比签名
return hash === signature;
} catch (error) {
logger.error('❌ 签名验证计算失败', {
error: error instanceof Error ? error.message : String(error),
});
return false;
}
}
/**
* 异步处理消息
*
* 在5秒内返回success后异步处理实际的业务逻辑
*
* @param body 消息体XML格式字符串
*/
private async processMessageAsync(body: string): Promise<void> {
try {
logger.info('📝 开始异步处理消息', {
bodyType: typeof body,
bodyPreview: typeof body === 'string' ? body.substring(0, 200) : JSON.stringify(body).substring(0, 200),
});
// 解析XML消息体
const message = this.parseXmlMessage(body);
// 判断消息类型
if (message.MsgType === 'event') {
// 事件消息
logger.info('🎯 处理事件消息', {
event: message.Event,
fromUser: message.FromUserName,
});
// TODO: 根据不同的事件类型进行处理
switch (message.Event) {
case 'subscribe':
logger.info('👤 用户关注公众号', { openid: message.FromUserName });
break;
case 'unsubscribe':
logger.info('👋 用户取消关注公众号', { openid: message.FromUserName });
break;
default:
logger.info('📌 其他事件', { event: message.Event });
}
} else if (message.MsgType === 'text') {
// 文本消息
logger.info('💬 处理文本消息', {
fromUser: message.FromUserName,
content: message.Content,
});
// TODO: 调用ChatService处理文本消息回复患者
} else {
logger.info('📦 其他类型消息', { msgType: message.MsgType });
}
logger.info('✅ 消息处理完成');
} catch (error) {
logger.error('❌ 消息处理失败', {
error: error instanceof Error ? error.message : String(error),
stack: error instanceof Error ? error.stack : undefined,
});
}
}
/**
* 解析XML消息体
*
* 微信推送的消息格式(明文模式):
* <xml>
* <ToUserName><![CDATA[gh_xxx]]></ToUserName>
* <FromUserName><![CDATA[oXXX]]></FromUserName>
* <CreateTime>1234567890</CreateTime>
* <MsgType><![CDATA[text]]></MsgType>
* <Content><![CDATA[你好]]></Content>
* </xml>
*
* @param xml XML字符串
* @returns 解析后的消息对象
*/
private parseXmlMessage(xml: string): any {
try {
const message: any = {};
// 简单的XML解析提取标签内容
const extractTag = (tagName: string): string | undefined => {
// 匹配 <tagName><![CDATA[...]]></tagName> 或 <tagName>...</tagName>
const cdataMatch = xml.match(new RegExp(`<${tagName}><!\\[CDATA\\[([^\\]]+)\\]\\]><\\/${tagName}>`));
if (cdataMatch) return cdataMatch[1];
const textMatch = xml.match(new RegExp(`<${tagName}>([^<]+)<\\/${tagName}>`));
if (textMatch) return textMatch[1];
return undefined;
};
// 提取常见字段
message.ToUserName = extractTag('ToUserName');
message.FromUserName = extractTag('FromUserName');
message.CreateTime = extractTag('CreateTime');
message.MsgType = extractTag('MsgType');
message.MsgId = extractTag('MsgId');
// 根据消息类型提取特定字段
if (message.MsgType === 'text') {
message.Content = extractTag('Content');
} else if (message.MsgType === 'event') {
message.Event = extractTag('Event');
message.EventKey = extractTag('EventKey');
}
logger.debug('📋 XML解析结果', {
message,
});
return message;
} catch (error) {
logger.error('❌ XML解析失败', {
error: error instanceof Error ? error.message : String(error),
xml: xml.substring(0, 200),
});
return {};
}
}
}

View File

@@ -530,3 +530,7 @@ URL: https://iit.xunzhengyixue.com/api/v1/iit/patient-wechat/callback
**最后更新**2026-01-04
**文档版本**v1.0

View File

@@ -165,3 +165,7 @@ console.log('');
console.log('🎉 生成完成!');
console.log('');

View File

@@ -6,6 +6,7 @@ import { FastifyInstance } from 'fastify';
import { WebhookController } from '../controllers/WebhookController.js';
import { wechatCallbackController } from '../controllers/WechatCallbackController.js';
import { patientWechatCallbackController } from '../controllers/PatientWechatCallbackController.js';
import { PatientWechatCallbackPlainController } from '../controllers/PatientWechatCallbackPlainController.js';
import { SyncManager } from '../services/SyncManager.js';
import { logger } from '../../../common/logging/index.js';
@@ -13,6 +14,33 @@ export async function registerIitRoutes(fastify: FastifyInstance) {
// 初始化控制器和服务
const webhookController = new WebhookController();
const syncManager = new SyncManager();
const patientWechatCallbackPlainController = new PatientWechatCallbackPlainController();
// 添加XML内容解析器用于微信公众号明文模式
// 安全注册:避免重复注册错误
try {
fastify.addContentTypeParser('text/xml', { parseAs: 'string' }, function (req, body, done) {
done(null, body);
});
} catch (error: any) {
// 解析器可能已存在,忽略错误
if (error.code !== 'FST_ERR_CTP_ALREADY_PRESENT') {
throw error;
}
logger.debug('text/xml parser already exists, skipping');
}
try {
fastify.addContentTypeParser('application/xml', { parseAs: 'string' }, function (req, body, done) {
done(null, body);
});
} catch (error: any) {
// 解析器可能已存在,忽略错误
if (error.code !== 'FST_ERR_CTP_ALREADY_PRESENT') {
throw error;
}
logger.debug('application/xml parser already exists, skipping');
}
// =============================================
// 健康检查
@@ -240,18 +268,8 @@ export async function registerIitRoutes(fastify: FastifyInstance) {
// 企业微信回调路由
// =============================================
// 注text/xml解析器(企业微信回调使用此格式
fastify.addContentTypeParser(
'text/xml',
{ parseAs: 'string' },
(req, body, done) => {
// 企业微信发送的是XML字符串直接返回字符串即可
// 在控制器中使用xml2js进行解析
done(null, body);
}
);
logger.info('Registered content parser: text/xml');
// 注意:text/xml解析器已在文件开头注册(通用于企业微信和微信服务号
logger.info('Using shared text/xml parser for WeChat Work callbacks');
// GET: URL验证企业微信配置回调URL时使用
fastify.get(
@@ -300,8 +318,56 @@ export async function registerIitRoutes(fastify: FastifyInstance) {
// 微信服务号回调路由(患者端)
// =============================================
// 简化路由(用于微信公众平台配置,路径更短)
// GET: URL验证
// ===== 明文模式Plain Text Mode =====
// 推荐:先用明文模式测试基础功能,成功后再切换到安全模式
// GET: URL验证明文模式
fastify.get(
'/wechat/patient/callback-plain',
{
schema: {
querystring: {
type: 'object',
properties: {
signature: { type: 'string' },
timestamp: { type: 'string' },
nonce: { type: 'string' },
echostr: { type: 'string' }
},
additionalProperties: true
}
}
},
patientWechatCallbackPlainController.handleVerification.bind(patientWechatCallbackPlainController)
);
logger.info('Registered route: GET /wechat/patient/callback-plain (明文模式)');
// POST: 接收消息明文模式XML格式
// 注意微信推送的是XML格式的消息体
fastify.post(
'/wechat/patient/callback-plain',
{
schema: {
querystring: {
type: 'object',
properties: {
signature: { type: 'string' },
timestamp: { type: 'string' },
nonce: { type: 'string' }
},
additionalProperties: true
}
}
},
patientWechatCallbackPlainController.handleCallback.bind(patientWechatCallbackPlainController)
);
logger.info('Registered route: POST /wechat/patient/callback-plain (明文模式, XML)');
// ===== 安全模式Secure Mode / AES加密 =====
// GET: URL验证安全模式
// 注意不使用required字段避免Fastify过早拦截微信的请求
fastify.get(
'/wechat/patient/callback',
@@ -323,9 +389,9 @@ export async function registerIitRoutes(fastify: FastifyInstance) {
patientWechatCallbackController.handleVerification.bind(patientWechatCallbackController)
);
logger.info('Registered route: GET /wechat/patient/callback');
logger.info('Registered route: GET /wechat/patient/callback (安全模式)');
// POST: 接收消息
// POST: 接收消息(安全模式)
// 注意不使用required字段避免Fastify过早拦截微信的请求
fastify.post(
'/wechat/patient/callback',
@@ -348,7 +414,7 @@ export async function registerIitRoutes(fastify: FastifyInstance) {
patientWechatCallbackController.handleCallback.bind(patientWechatCallbackController)
);
logger.info('Registered route: POST /wechat/patient/callback');
logger.info('Registered route: POST /wechat/patient/callback (安全模式)');
// 完整路由(兼容旧配置,保留)
// GET: URL验证微信服务号配置回调URL时使用

View File

@@ -482,3 +482,7 @@ export class PatientWechatService {
// 导出单例
export const patientWechatService = new PatientWechatService();

View File

@@ -127,3 +127,7 @@ testDifyIntegration().catch(error => {
});

View File

@@ -156,3 +156,7 @@ testIitDatabase()

View File

@@ -142,3 +142,7 @@ if (hasError) {
console.log('\n');
}

View File

@@ -168,3 +168,7 @@ async function testUrlVerification() {
}
})();

View File

@@ -249,3 +249,7 @@ main().catch((error) => {

View File

@@ -133,3 +133,7 @@ try {
Write-Host ""

View File

@@ -0,0 +1,119 @@
# Test WeChat Official Account - Plain Text Mode URL Verification
# Configuration
$Token = "IitPatientWechat2026JanToken"
$Timestamp = [DateTimeOffset]::UtcNow.ToUnixTimeSeconds().ToString()
$Nonce = Get-Random -Minimum 100000000 -Maximum 999999999
$Echostr = "test_echo_string_$(Get-Random)"
Write-Host "================================" -ForegroundColor Cyan
Write-Host "WeChat MP Plain Mode URL Verification Test" -ForegroundColor Cyan
Write-Host "================================" -ForegroundColor Cyan
Write-Host ""
Write-Host "Configuration:" -ForegroundColor Yellow
Write-Host " Token: $Token"
Write-Host " Timestamp: $Timestamp"
Write-Host " Nonce: $Nonce"
Write-Host " Echostr: $Echostr"
Write-Host ""
# Calculate signature
Write-Host "Calculating signature..." -ForegroundColor Yellow
# 1. Sort by dictionary order
$sortedArray = @($Token, $Timestamp, $Nonce) | Sort-Object
Write-Host " Sorted array: [$($sortedArray -join ', ')]"
# 2. Concatenate string
$concatenatedString = $sortedArray -join ''
Write-Host " Concatenated: $concatenatedString"
# 3. SHA1 hash
$sha1 = [System.Security.Cryptography.SHA1]::Create()
$bytes = [System.Text.Encoding]::UTF8.GetBytes($concatenatedString)
$hashBytes = $sha1.ComputeHash($bytes)
$signature = [System.BitConverter]::ToString($hashBytes).Replace("-", "").ToLower()
Write-Host " SHA1 signature: $signature" -ForegroundColor Green
Write-Host ""
# Test 1: Local server (via natapp)
Write-Host "Test 1: Local server (via natapp)" -ForegroundColor Cyan
Write-Host "----------------------------------------"
$localUrl = "https://devlocal.xunzhengyixue.com/wechat/patient/callback-plain"
$localFullUrl = "{0}?signature={1}`&timestamp={2}`&nonce={3}`&echostr={4}" -f $localUrl, $signature, $Timestamp, $Nonce, $Echostr
Write-Host " Request URL: $localFullUrl" -ForegroundColor Gray
try {
$response = Invoke-WebRequest -Uri $localFullUrl -Method Get -UseBasicParsing
if ($response.StatusCode -eq 200) {
Write-Host " SUCCESS (200 OK)" -ForegroundColor Green
Write-Host " Response content: $($response.Content)" -ForegroundColor Green
if ($response.Content -eq $Echostr) {
Write-Host " echostr MATCHED!" -ForegroundColor Green
} else {
Write-Host " echostr MISMATCH!" -ForegroundColor Red
Write-Host " Expected: $Echostr" -ForegroundColor Red
Write-Host " Actual: $($response.Content)" -ForegroundColor Red
}
} else {
Write-Host " FAILED ($($response.StatusCode))" -ForegroundColor Red
}
} catch {
Write-Host " ERROR: $($_.Exception.Message)" -ForegroundColor Red
if ($_.Exception.Response) {
$statusCode = [int]$_.Exception.Response.StatusCode
Write-Host " HTTP Status: $statusCode" -ForegroundColor Red
}
}
Write-Host ""
# Test 2: Direct localhost (localhost:3001)
Write-Host "Test 2: Direct localhost (localhost:3001)" -ForegroundColor Cyan
Write-Host "----------------------------------------"
$localhostUrl = "http://localhost:3001/wechat/patient/callback-plain"
$localhostFullUrl = "{0}?signature={1}`&timestamp={2}`&nonce={3}`&echostr={4}" -f $localhostUrl, $signature, $Timestamp, $Nonce, $Echostr
Write-Host " Request URL: $localhostFullUrl" -ForegroundColor Gray
try {
$response = Invoke-WebRequest -Uri $localhostFullUrl -Method Get -UseBasicParsing
if ($response.StatusCode -eq 200) {
Write-Host " SUCCESS (200 OK)" -ForegroundColor Green
Write-Host " Response content: $($response.Content)" -ForegroundColor Green
if ($response.Content -eq $Echostr) {
Write-Host " echostr MATCHED!" -ForegroundColor Green
} else {
Write-Host " echostr MISMATCH!" -ForegroundColor Red
Write-Host " Expected: $Echostr" -ForegroundColor Red
Write-Host " Actual: $($response.Content)" -ForegroundColor Red
}
} else {
Write-Host " FAILED ($($response.StatusCode))" -ForegroundColor Red
}
} catch {
Write-Host " ERROR: $($_.Exception.Message)" -ForegroundColor Red
if ($_.Exception.Response) {
$statusCode = [int]$_.Exception.Response.StatusCode
Write-Host " HTTP Status: $statusCode" -ForegroundColor Red
}
}
Write-Host ""
Write-Host "================================" -ForegroundColor Cyan
Write-Host "Next Step: WeChat MP Configuration" -ForegroundColor Green
Write-Host "================================" -ForegroundColor Cyan
Write-Host ""
Write-Host "If local tests passed, configure in WeChat MP:" -ForegroundColor Yellow
Write-Host ""
Write-Host " URL: https://devlocal.xunzhengyixue.com/wechat/patient/callback-plain" -ForegroundColor Cyan
Write-Host " Token: $Token" -ForegroundColor Cyan
Write-Host " Encryption Mode: Plain Text Mode" -ForegroundColor Cyan
Write-Host " Data Format: XML (IMPORTANT!)" -ForegroundColor Cyan
Write-Host ""

View File

@@ -226,3 +226,7 @@ export interface CachedProtocolRules {

View File

@@ -0,0 +1,428 @@
/**
* Phase 3: 批处理模式 - 批处理控制器
*
* API路由
* - POST /api/v1/batch/execute - 执行批处理任务
* - GET /api/v1/batch/tasks/:taskId - 获取任务状态
* - GET /api/v1/batch/tasks/:taskId/results - 获取任务结果
* - POST /api/v1/batch/tasks/:taskId/retry-failed - 重试失败项
*/
import { FastifyRequest, FastifyReply } from 'fastify';
import { executeBatchTask, retryFailedDocuments, BatchProgress } from '../services/batchService.js';
import { prisma } from '../../../config/database.js';
import { ModelType } from '../../../common/llm/adapters/types.js';
// ==================== 类型定义 ====================
interface ExecuteBatchBody {
kb_id: string;
document_ids: string[];
template_type: 'preset' | 'custom';
template_id?: string;
custom_prompt?: string;
model_type: ModelType;
task_name?: string;
}
interface TaskIdParams {
taskId: string;
}
// ==================== API处理器 ====================
/**
* POST /api/v1/batch/execute
* 执行批处理任务
*/
export async function executeBatch(
request: FastifyRequest<{ Body: ExecuteBatchBody }>,
reply: FastifyReply
) {
try {
// TODO: 从JWT获取userId
const userId = 'user-mock-001';
const {
kb_id,
document_ids,
template_type,
template_id,
custom_prompt,
model_type,
task_name,
} = request.body;
console.log('📦 [BatchController] 收到批处理请求', {
userId,
kbId: kb_id,
documentCount: document_ids.length,
templateType: template_type,
modelType: model_type,
});
// 验证参数
if (!kb_id || !document_ids || document_ids.length === 0) {
return reply.code(400).send({
success: false,
message: '缺少必要参数kb_id 或 document_ids',
});
}
if (document_ids.length < 3) {
return reply.code(400).send({
success: false,
message: '文献数量不能少于3篇',
});
}
if (document_ids.length > 50) {
return reply.code(400).send({
success: false,
message: '文献数量不能超过50篇',
});
}
if (template_type === 'preset' && !template_id) {
return reply.code(400).send({
success: false,
message: '预设模板类型需要提供 template_id',
});
}
if (template_type === 'custom' && !custom_prompt) {
return reply.code(400).send({
success: false,
message: '自定义模板需要提供 custom_prompt',
});
}
// 验证模型类型
const validModels: ModelType[] = ['deepseek-v3', 'qwen3-72b', 'qwen-long'];
if (!validModels.includes(model_type)) {
return reply.code(400).send({
success: false,
message: `不支持的模型类型: ${model_type}`,
});
}
// 验证知识库是否存在
const kb = await prisma.knowledgeBase.findUnique({
where: { id: kb_id },
});
if (!kb) {
return reply.code(404).send({
success: false,
message: `知识库不存在: ${kb_id}`,
});
}
// 验证文档是否都存在
const documents = await prisma.document.findMany({
where: {
id: { in: document_ids },
kbId: kb_id,
},
});
if (documents.length !== document_ids.length) {
return reply.code(400).send({
success: false,
message: `部分文档不存在或不属于该知识库`,
});
}
// 获取WebSocket实例用于进度推送
const io = (request.server as any).io;
// 先创建任务记录获取taskId
const taskPreview = await prisma.batchTask.create({
data: {
userId,
kbId: kb_id,
name: task_name || `批处理任务_${new Date().toLocaleString('zh-CN')}`,
templateType: template_type,
templateId: template_id || null,
prompt: custom_prompt || template_id || '',
status: 'processing',
totalDocuments: document_ids.length,
modelType: model_type,
concurrency: 3,
startedAt: new Date(),
},
});
const taskId = taskPreview.id;
console.log(`✅ [BatchController] 创建任务: ${taskId}`);
// 执行批处理任务(异步)
executeBatchTask({
userId,
kbId: kb_id,
documentIds: document_ids,
templateType: template_type,
templateId: template_id,
customPrompt: custom_prompt,
modelType: model_type,
taskName: task_name,
existingTaskId: taskId, // 使用已创建的任务ID
onProgress: (progress: BatchProgress) => {
// WebSocket推送进度
if (io) {
io.to(userId).emit('batch-progress', progress);
}
},
})
.then((result) => {
console.log(`🎉 [BatchController] 批处理任务完成: ${result.taskId}`);
// 推送完成事件
if (io) {
io.to(userId).emit('batch-completed', {
task_id: result.taskId,
status: result.status,
});
}
})
.catch((error) => {
console.error(`❌ [BatchController] 批处理任务失败:`, error);
// 推送失败事件
if (io) {
io.to(userId).emit('batch-failed', {
task_id: 'unknown',
error: error.message,
});
}
});
// 立即返回任务ID任务在后台执行
reply.send({
success: true,
message: '批处理任务已开始',
data: {
task_id: taskId,
status: 'processing',
websocket_event: 'batch-progress',
},
});
} catch (error: any) {
console.error('❌ [BatchController] 执行批处理失败:', error);
reply.code(500).send({
success: false,
message: error.message || '执行批处理任务失败',
});
}
}
/**
* GET /api/v1/batch/tasks/:taskId
* 获取任务状态
*/
export async function getTask(
request: FastifyRequest<{ Params: TaskIdParams }>,
reply: FastifyReply
) {
try {
const { taskId } = request.params;
const task = await prisma.batchTask.findUnique({
where: { id: taskId },
select: {
id: true,
name: true,
status: true,
totalDocuments: true,
completedCount: true,
failedCount: true,
modelType: true,
startedAt: true,
completedAt: true,
durationSeconds: true,
createdAt: true,
},
});
if (!task) {
return reply.code(404).send({
success: false,
message: `任务不存在: ${taskId}`,
});
}
reply.send({
success: true,
data: {
id: task.id,
name: task.name,
status: task.status,
total_documents: task.totalDocuments,
completed_count: task.completedCount,
failed_count: task.failedCount,
model_type: task.modelType,
started_at: task.startedAt,
completed_at: task.completedAt,
duration_seconds: task.durationSeconds,
created_at: task.createdAt,
},
});
} catch (error: any) {
console.error('❌ [BatchController] 获取任务失败:', error);
reply.code(500).send({
success: false,
message: error.message || '获取任务失败',
});
}
}
/**
* GET /api/v1/batch/tasks/:taskId/results
* 获取任务结果
*/
export async function getTaskResults(
request: FastifyRequest<{ Params: TaskIdParams }>,
reply: FastifyReply
) {
try {
const { taskId } = request.params;
// 获取任务信息
const task = await prisma.batchTask.findUnique({
where: { id: taskId },
include: {
results: {
include: {
document: {
select: {
filename: true,
tokensCount: true,
},
},
},
orderBy: {
createdAt: 'asc',
},
},
},
});
if (!task) {
return reply.code(404).send({
success: false,
message: `任务不存在: ${taskId}`,
});
}
// 格式化结果
const results = task.results.map((r, index) => ({
id: r.id,
index: index + 1,
document_id: r.documentId,
document_name: r.document.filename,
status: r.status,
data: r.data,
raw_output: r.rawOutput,
error_message: r.errorMessage,
processing_time_ms: r.processingTimeMs,
tokens_used: r.tokensUsed,
created_at: r.createdAt,
}));
reply.send({
success: true,
data: {
task: {
id: task.id,
name: task.name,
status: task.status,
template_type: task.templateType,
template_id: task.templateId,
total_documents: task.totalDocuments,
completed_count: task.completedCount,
failed_count: task.failedCount,
duration_seconds: task.durationSeconds,
created_at: task.createdAt,
completed_at: task.completedAt,
},
results,
},
});
} catch (error: any) {
console.error('❌ [BatchController] 获取任务结果失败:', error);
reply.code(500).send({
success: false,
message: error.message || '获取任务结果失败',
});
}
}
/**
* POST /api/v1/batch/tasks/:taskId/retry-failed
* 重试失败的文档
*/
export async function retryFailed(
request: FastifyRequest<{ Params: TaskIdParams }>,
reply: FastifyReply
) {
try {
const { taskId } = request.params;
const userId = 'user-mock-001'; // TODO: 从JWT获取
// 获取WebSocket实例
const io = (request.server as any).io;
// 执行重试(异步)
retryFailedDocuments(taskId, (progress: BatchProgress) => {
if (io) {
io.to(userId).emit('batch-progress', progress);
}
})
.then((result) => {
console.log(`✅ [BatchController] 重试完成: ${result.retriedCount}`);
})
.catch((error) => {
console.error(`❌ [BatchController] 重试失败:`, error);
});
reply.send({
success: true,
message: '已开始重试失败的文档',
});
} catch (error: any) {
console.error('❌ [BatchController] 重试失败:', error);
reply.code(500).send({
success: false,
message: error.message || '重试失败',
});
}
}
/**
* GET /api/v1/batch/templates
* 获取所有预设模板
*/
export async function getTemplates(
request: FastifyRequest,
reply: FastifyReply
) {
try {
const { getAllTemplates } = await import('../templates/clinicalResearch.js');
const templates = getAllTemplates();
reply.send({
success: true,
data: templates.map(t => ({
id: t.id,
name: t.name,
description: t.description,
output_fields: t.outputFields,
})),
});
} catch (error: any) {
console.error('❌ [BatchController] 获取模板失败:', error);
reply.code(500).send({
success: false,
message: error.message || '获取模板失败',
});
}
}

View File

@@ -0,0 +1,314 @@
import type { FastifyRequest, FastifyReply } from 'fastify';
import * as documentService from '../services/documentService.js';
// Mock用户ID实际应从JWT token中获取
const MOCK_USER_ID = 'user-mock-001';
/**
* 上传文档
*/
export async function uploadDocument(
request: FastifyRequest<{
Params: {
kbId: string;
};
}>,
reply: FastifyReply
) {
try {
const { kbId } = request.params;
console.log(`📤 开始上传文档到知识库: ${kbId}`);
// 获取上传的文件
const data = await request.file();
if (!data) {
console.error('❌ 没有接收到文件');
return reply.status(400).send({
success: false,
message: 'No file uploaded',
});
}
console.log(`📄 接收到文件: ${data.filename}, 类型: ${data.mimetype}`);
const file = await data.toBuffer();
const filename = data.filename;
const fileType = data.mimetype;
const fileSizeBytes = file.length;
// 文件大小限制10MB
const maxSize = 10 * 1024 * 1024;
console.log(`📊 文件大小: ${(fileSizeBytes / 1024 / 1024).toFixed(2)}MB (限制: 10MB)`);
if (fileSizeBytes > maxSize) {
console.error(`❌ 文件太大: ${(fileSizeBytes / 1024 / 1024).toFixed(2)}MB`);
return reply.status(400).send({
success: false,
message: 'File size exceeds 10MB limit',
});
}
// 文件类型限制
const allowedTypes = [
'application/pdf',
'application/msword',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'text/plain',
'text/markdown',
];
console.log(`🔍 检查文件类型: ${fileType}`);
if (!allowedTypes.includes(fileType)) {
console.error(`❌ 不支持的文件类型: ${fileType}`);
return reply.status(400).send({
success: false,
message: 'File type not supported. Allowed: PDF, DOC, DOCX, TXT, MD',
});
}
// 上传文档这里fileUrl暂时为空实际应该上传到对象存储
console.log(`⚙️ 调用文档服务上传文件...`);
const document = await documentService.uploadDocument(
MOCK_USER_ID,
kbId,
file,
filename,
fileType,
fileSizeBytes,
'' // fileUrl - 可以上传到OSS后填入
);
console.log(`✅ 文档上传成功: ${document.id}`);
return reply.status(201).send({
success: true,
data: document,
});
} catch (error: any) {
console.error('❌ 文档上传失败:', error.message);
console.error('错误详情:', error);
if (error.message.includes('not found') || error.message.includes('access denied')) {
return reply.status(404).send({
success: false,
message: error.message,
});
}
if (error.message.includes('limit exceeded')) {
return reply.status(400).send({
success: false,
message: error.message,
});
}
return reply.status(500).send({
success: false,
message: error.message || 'Failed to upload document',
});
}
}
/**
* 获取文档列表
*/
export async function getDocuments(
request: FastifyRequest<{
Params: {
kbId: string;
};
}>,
reply: FastifyReply
) {
try {
const { kbId } = request.params;
const documents = await documentService.getDocuments(MOCK_USER_ID, kbId);
return reply.send({
success: true,
data: documents,
});
} catch (error: any) {
console.error('Failed to get documents:', error);
if (error.message.includes('not found')) {
return reply.status(404).send({
success: false,
message: error.message,
});
}
return reply.status(500).send({
success: false,
message: error.message || 'Failed to get documents',
});
}
}
/**
* 获取文档详情
*/
export async function getDocumentById(
request: FastifyRequest<{
Params: {
id: string;
};
}>,
reply: FastifyReply
) {
try {
const { id } = request.params;
const document = await documentService.getDocumentById(MOCK_USER_ID, id);
return reply.send({
success: true,
data: document,
});
} catch (error: any) {
console.error('Failed to get document:', error);
if (error.message.includes('not found')) {
return reply.status(404).send({
success: false,
message: error.message,
});
}
return reply.status(500).send({
success: false,
message: error.message || 'Failed to get document',
});
}
}
/**
* 删除文档
*/
export async function deleteDocument(
request: FastifyRequest<{
Params: {
id: string;
};
}>,
reply: FastifyReply
) {
try {
const { id } = request.params;
await documentService.deleteDocument(MOCK_USER_ID, id);
return reply.send({
success: true,
message: 'Document deleted successfully',
});
} catch (error: any) {
console.error('Failed to delete document:', error);
if (error.message.includes('not found')) {
return reply.status(404).send({
success: false,
message: error.message,
});
}
return reply.status(500).send({
success: false,
message: error.message || 'Failed to delete document',
});
}
}
/**
* 重新处理文档
*/
export async function reprocessDocument(
request: FastifyRequest<{
Params: {
id: string;
};
}>,
reply: FastifyReply
) {
try {
const { id } = request.params;
await documentService.reprocessDocument(MOCK_USER_ID, id);
return reply.send({
success: true,
message: 'Document reprocessing started',
});
} catch (error: any) {
console.error('Failed to reprocess document:', error);
if (error.message.includes('not found')) {
return reply.status(404).send({
success: false,
message: error.message,
});
}
return reply.status(500).send({
success: false,
message: error.message || 'Failed to reprocess document',
});
}
}
/**
* Phase 2: 获取文档全文(用于逐篇精读模式)
*/
export async function getDocumentFullText(
request: FastifyRequest<{
Params: {
id: string;
};
}>,
reply: FastifyReply
) {
try {
const { id } = request.params;
const document = await documentService.getDocumentById(MOCK_USER_ID, id);
// 返回完整的文档信息
return reply.send({
success: true,
data: {
documentId: document.id,
filename: document.filename,
fileType: document.fileType,
fileSizeBytes: document.fileSizeBytes,
extractedText: (document as any).extractedText || null,
charCount: (document as any).charCount || null,
tokensCount: document.tokensCount || null,
extractionMethod: (document as any).extractionMethod || null,
extractionQuality: (document as any).extractionQuality || null,
language: (document as any).language || null,
metadata: {
uploadedAt: document.uploadedAt,
processedAt: document.processedAt,
status: document.status,
},
},
});
} catch (error: any) {
console.error('Failed to get document full text:', error);
if (error.message.includes('not found')) {
return reply.status(404).send({
success: false,
message: error.message,
});
}
return reply.status(500).send({
success: false,
message: error.message || 'Failed to get document full text',
});
}
}

View File

@@ -0,0 +1,341 @@
import type { FastifyRequest, FastifyReply } from 'fastify';
import * as knowledgeBaseService from '../services/knowledgeBaseService.js';
// Mock用户ID实际应从JWT token中获取
const MOCK_USER_ID = 'user-mock-001';
/**
* 创建知识库
*/
export async function createKnowledgeBase(
request: FastifyRequest<{
Body: {
name: string;
description?: string;
};
}>,
reply: FastifyReply
) {
try {
const { name, description } = request.body;
if (!name || name.trim().length === 0) {
return reply.status(400).send({
success: false,
message: 'Knowledge base name is required',
});
}
const knowledgeBase = await knowledgeBaseService.createKnowledgeBase(
MOCK_USER_ID,
name,
description
);
return reply.status(201).send({
success: true,
data: knowledgeBase,
});
} catch (error: any) {
console.error('Failed to create knowledge base:', error);
return reply.status(500).send({
success: false,
message: error.message || 'Failed to create knowledge base',
});
}
}
/**
* 获取知识库列表
*/
export async function getKnowledgeBases(
_request: FastifyRequest,
reply: FastifyReply
) {
try {
const knowledgeBases = await knowledgeBaseService.getKnowledgeBases(
MOCK_USER_ID
);
return reply.send({
success: true,
data: knowledgeBases,
});
} catch (error: any) {
console.error('Failed to get knowledge bases:', error);
return reply.status(500).send({
success: false,
message: error.message || 'Failed to get knowledge bases',
});
}
}
/**
* 获取知识库详情
*/
export async function getKnowledgeBaseById(
request: FastifyRequest<{
Params: {
id: string;
};
}>,
reply: FastifyReply
) {
try {
const { id } = request.params;
const knowledgeBase = await knowledgeBaseService.getKnowledgeBaseById(
MOCK_USER_ID,
id
);
return reply.send({
success: true,
data: knowledgeBase,
});
} catch (error: any) {
console.error('Failed to get knowledge base:', error);
if (error.message.includes('not found')) {
return reply.status(404).send({
success: false,
message: error.message,
});
}
return reply.status(500).send({
success: false,
message: error.message || 'Failed to get knowledge base',
});
}
}
/**
* 更新知识库
*/
export async function updateKnowledgeBase(
request: FastifyRequest<{
Params: {
id: string;
};
Body: {
name?: string;
description?: string;
};
}>,
reply: FastifyReply
) {
try {
const { id } = request.params;
const updateData = request.body;
const knowledgeBase = await knowledgeBaseService.updateKnowledgeBase(
MOCK_USER_ID,
id,
updateData
);
return reply.send({
success: true,
data: knowledgeBase,
});
} catch (error: any) {
console.error('Failed to update knowledge base:', error);
if (error.message.includes('not found')) {
return reply.status(404).send({
success: false,
message: error.message,
});
}
return reply.status(500).send({
success: false,
message: error.message || 'Failed to update knowledge base',
});
}
}
/**
* 删除知识库
*/
export async function deleteKnowledgeBase(
request: FastifyRequest<{
Params: {
id: string;
};
}>,
reply: FastifyReply
) {
try {
const { id } = request.params;
await knowledgeBaseService.deleteKnowledgeBase(MOCK_USER_ID, id);
return reply.send({
success: true,
message: 'Knowledge base deleted successfully',
});
} catch (error: any) {
console.error('Failed to delete knowledge base:', error);
if (error.message.includes('not found')) {
return reply.status(404).send({
success: false,
message: error.message,
});
}
return reply.status(500).send({
success: false,
message: error.message || 'Failed to delete knowledge base',
});
}
}
/**
* 检索知识库
*/
export async function searchKnowledgeBase(
request: FastifyRequest<{
Params: {
id: string;
};
Querystring: {
query: string;
top_k?: string;
};
}>,
reply: FastifyReply
) {
try {
const { id } = request.params;
const { query, top_k } = request.query;
if (!query || query.trim().length === 0) {
return reply.status(400).send({
success: false,
message: 'Query parameter is required',
});
}
const topK = top_k ? parseInt(top_k, 10) : 15; // Phase 1优化默认从3增加到15
const results = await knowledgeBaseService.searchKnowledgeBase(
MOCK_USER_ID,
id,
query,
topK
);
return reply.send({
success: true,
data: results,
});
} catch (error: any) {
console.error('Failed to search knowledge base:', error);
if (error.message.includes('not found')) {
return reply.status(404).send({
success: false,
message: error.message,
});
}
return reply.status(500).send({
success: false,
message: error.message || 'Failed to search knowledge base',
});
}
}
/**
* 获取知识库统计信息
*/
export async function getKnowledgeBaseStats(
request: FastifyRequest<{
Params: {
id: string;
};
}>,
reply: FastifyReply
) {
try {
const { id } = request.params;
const stats = await knowledgeBaseService.getKnowledgeBaseStats(
MOCK_USER_ID,
id
);
return reply.send({
success: true,
data: stats,
});
} catch (error: any) {
console.error('Failed to get knowledge base stats:', error);
if (error.message.includes('not found')) {
return reply.status(404).send({
success: false,
message: error.message,
});
}
return reply.status(500).send({
success: false,
message: error.message || 'Failed to get knowledge base stats',
});
}
}
/**
* 获取知识库文档选择Phase 2: 全文阅读模式)
*/
export async function getDocumentSelection(
request: FastifyRequest<{
Params: {
id: string;
};
Querystring: {
max_files?: string;
max_tokens?: string;
};
}>,
reply: FastifyReply
) {
try {
const { id } = request.params;
const { max_files, max_tokens } = request.query;
const maxFiles = max_files ? parseInt(max_files, 10) : undefined;
const maxTokens = max_tokens ? parseInt(max_tokens, 10) : undefined;
const selection = await knowledgeBaseService.getDocumentSelection(
MOCK_USER_ID,
id,
maxFiles,
maxTokens
);
return reply.send({
success: true,
data: selection,
});
} catch (error: any) {
console.error('Failed to get document selection:', error);
if (error.message.includes('not found')) {
return reply.status(404).send({
success: false,
message: error.message,
});
}
return reply.status(500).send({
success: false,
message: error.message || 'Failed to get document selection',
});
}
}

View File

@@ -0,0 +1,12 @@
/**
* PKB个人知识库模块入口
*
* 功能:
* - 知识库CRUD
* - 文档上传和管理
* - RAG检索
* - 批处理任务
*/
export { default as pkbRoutes } from './routes/index.js';

View File

@@ -0,0 +1,38 @@
/**
* Phase 3: 批处理模式 - 路由配置
*/
import { FastifyInstance } from 'fastify';
import {
executeBatch,
getTask,
getTaskResults,
retryFailed,
getTemplates,
} from '../controllers/batchController.js';
export default async function batchRoutes(fastify: FastifyInstance) {
// 执行批处理任务
fastify.post('/batch/execute', executeBatch);
// 获取任务状态
fastify.get('/batch/tasks/:taskId', getTask);
// 获取任务结果
fastify.get('/batch/tasks/:taskId/results', getTaskResults);
// 重试失败的文档
fastify.post('/batch/tasks/:taskId/retry-failed', retryFailed);
// 获取所有预设模板
fastify.get('/batch/templates', getTemplates);
}

View File

@@ -0,0 +1,45 @@
/**
* PKB模块健康检查路由
*/
import { FastifyInstance, FastifyRequest, FastifyReply } from 'fastify';
import { prisma } from '../../../config/database.js';
export default async function healthRoutes(fastify: FastifyInstance) {
// PKB模块健康检查
fastify.get('/health', async (request: FastifyRequest, reply: FastifyReply) => {
try {
// 检查数据库连接
await prisma.$queryRaw`SELECT 1`;
// 检查pkb_schema是否可访问
const kbCount = await prisma.knowledgeBase.count();
return reply.send({
status: 'ok',
module: 'pkb',
version: 'v2',
timestamp: new Date().toISOString(),
database: {
connected: true,
schema: 'pkb_schema',
knowledgeBases: kbCount,
},
message: 'PKB模块运行正常',
});
} catch (error: any) {
return reply.status(503).send({
status: 'error',
module: 'pkb',
version: 'v2',
timestamp: new Date().toISOString(),
database: {
connected: false,
error: error.message,
},
message: 'PKB模块健康检查失败',
});
}
});
}

View File

@@ -0,0 +1,19 @@
/**
* PKB模块路由入口
*/
import { FastifyInstance } from 'fastify';
import knowledgeBaseRoutes from './knowledgeBases.js';
import batchRoutes from './batchRoutes.js';
import healthRoutes from './health.js';
export default async function pkbRoutes(fastify: FastifyInstance) {
// 健康检查路由
fastify.register(healthRoutes);
// 注册知识库路由
fastify.register(knowledgeBaseRoutes, { prefix: '/knowledge' });
// 注册批处理路由
fastify.register(batchRoutes, { prefix: '/batch-tasks' });
}

View File

@@ -0,0 +1,53 @@
import type { FastifyInstance } from 'fastify';
import * as knowledgeBaseController from '../controllers/knowledgeBaseController.js';
import * as documentController from '../controllers/documentController.js';
export default async function knowledgeBaseRoutes(fastify: FastifyInstance) {
// ==================== 知识库管理 API ====================
// 创建知识库
fastify.post('/knowledge-bases', knowledgeBaseController.createKnowledgeBase);
// 获取知识库列表
fastify.get('/knowledge-bases', knowledgeBaseController.getKnowledgeBases);
// 获取知识库详情
fastify.get('/knowledge-bases/:id', knowledgeBaseController.getKnowledgeBaseById);
// 更新知识库
fastify.put('/knowledge-bases/:id', knowledgeBaseController.updateKnowledgeBase);
// 删除知识库
fastify.delete('/knowledge-bases/:id', knowledgeBaseController.deleteKnowledgeBase);
// 检索知识库
fastify.get('/knowledge-bases/:id/search', knowledgeBaseController.searchKnowledgeBase);
// 获取知识库统计信息
fastify.get('/knowledge-bases/:id/stats', knowledgeBaseController.getKnowledgeBaseStats);
// Phase 2: 获取文档选择(全文阅读模式)
fastify.get('/knowledge-bases/:id/document-selection', knowledgeBaseController.getDocumentSelection);
// ==================== 文档管理 API ====================
// 上传文档
fastify.post('/knowledge-bases/:kbId/documents', documentController.uploadDocument);
// 获取文档列表
fastify.get('/knowledge-bases/:kbId/documents', documentController.getDocuments);
// 获取文档详情
fastify.get('/documents/:id', documentController.getDocumentById);
// Phase 2: 获取文档全文
fastify.get('/documents/:id/full-text', documentController.getDocumentFullText);
// 删除文档
fastify.delete('/documents/:id', documentController.deleteDocument);
// 重新处理文档
fastify.post('/documents/:id/reprocess', documentController.reprocessDocument);
}

View File

@@ -0,0 +1,420 @@
/**
* Phase 3: 批处理模式 - 批处理服务
*
* 核心功能:
* 1. 执行批处理任务3并发
* 2. 处理单个文档
* 3. 进度推送WebSocket
* 4. 错误处理和重试
*/
import PQueue from 'p-queue';
import { prisma } from '../../../config/database.js';
import { LLMFactory } from '../../../common/llm/adapters/LLMFactory.js';
import { ModelType } from '../../../common/llm/adapters/types.js';
import { getTemplate } from '../../../legacy/templates/clinicalResearch.js';
import { parseJSON } from '../../../common/utils/jsonParser.js';
export interface ExecuteBatchTaskParams {
userId: string;
kbId: string;
documentIds: string[];
templateType: 'preset' | 'custom';
templateId?: string; // 预设模板ID
customPrompt?: string; // 自定义提示词
modelType: ModelType;
taskName?: string;
existingTaskId?: string; // 已存在的任务ID可选
onProgress?: (progress: BatchProgress) => void;
}
export interface BatchProgress {
taskId: string;
completed: number;
total: number;
failed: number;
currentDocument?: string;
estimatedSeconds?: number;
}
export interface BatchTaskResult {
taskId: string;
status: 'processing' | 'completed' | 'failed';
totalDocuments: number;
completedCount: number;
failedCount: number;
durationSeconds?: number;
}
/**
* 执行批处理任务
*/
export async function executeBatchTask(
params: ExecuteBatchTaskParams
): Promise<BatchTaskResult> {
const {
userId,
kbId,
documentIds,
templateType,
templateId,
customPrompt,
modelType,
taskName,
existingTaskId,
onProgress,
} = params;
console.log('📦 [BatchService] 开始执行批处理任务', {
userId,
kbId,
documentCount: documentIds.length,
templateType,
modelType,
existingTaskId: existingTaskId || '新建',
});
// 验证文献数量 (3-50篇)
if (documentIds.length < 3 || documentIds.length > 50) {
throw new Error(`文献数量必须在3-50篇之间当前${documentIds.length}`);
}
// 获取模板或使用自定义提示词
let systemPrompt: string;
let userPromptTemplate: string;
let expectedFields: string[] = [];
if (templateType === 'preset') {
if (!templateId) {
throw new Error('预设模板类型需要提供templateId');
}
const template = getTemplate(templateId);
if (!template) {
throw new Error(`未找到模板: ${templateId}`);
}
systemPrompt = template.systemPrompt;
userPromptTemplate = template.userPrompt;
expectedFields = template.outputFields.map(f => f.key);
} else {
// 自定义模板
if (!customPrompt) {
throw new Error('自定义模板需要提供customPrompt');
}
systemPrompt = '你是一个专业的文献分析助手。请根据用户的要求分析文献内容。';
userPromptTemplate = customPrompt;
}
// 使用已存在的任务或创建新任务
let task;
if (existingTaskId) {
task = await prisma.batchTask.findUnique({
where: { id: existingTaskId },
});
if (!task) {
throw new Error(`任务不存在: ${existingTaskId}`);
}
console.log(`✅ [BatchService] 使用已存在的任务: ${task.id}`);
} else {
task = await prisma.batchTask.create({
data: {
userId,
kbId,
name: taskName || `批处理任务_${new Date().toLocaleString('zh-CN')}`,
templateType,
templateId: templateId || null,
prompt: userPromptTemplate,
status: 'processing',
totalDocuments: documentIds.length,
completedCount: 0,
failedCount: 0,
modelType,
concurrency: 3, // 固定3并发
startedAt: new Date(),
},
});
console.log(`✅ [BatchService] 创建任务记录: ${task.id}`);
}
const startTime = Date.now();
let completedCount = 0;
let failedCount = 0;
// 创建并发队列固定3并发
const queue = new PQueue({ concurrency: 3 });
// 处理所有文档
const promises = documentIds.map((docId, index) =>
queue.add(async () => {
try {
console.log(`🔄 [BatchService] 处理文档 ${index + 1}/${documentIds.length}: ${docId}`);
// 获取文档
const document = await prisma.document.findUnique({
where: { id: docId },
select: {
id: true,
filename: true,
extractedText: true,
tokensCount: true,
},
});
if (!document) {
throw new Error(`文档不存在: ${docId}`);
}
if (!document.extractedText) {
throw new Error(`文档未提取文本: ${document.filename}`);
}
// 调用LLM处理
const result = await processDocument({
document: { ...document, extractedText: document.extractedText! } as any,
systemPrompt,
userPromptTemplate,
modelType,
templateType,
expectedFields,
});
// 保存结果
await prisma.batchResult.create({
data: {
taskId: task.id,
documentId: docId,
status: 'success',
data: result.data,
rawOutput: result.rawOutput,
processingTimeMs: result.processingTimeMs,
tokensUsed: result.tokensUsed,
},
});
completedCount++;
console.log(`✅ [BatchService] 文档处理成功: ${document.filename} (${result.processingTimeMs}ms)`);
} catch (error: any) {
// 处理失败
console.error(`❌ [BatchService] 文档处理失败: ${docId}`, error);
await prisma.batchResult.create({
data: {
taskId: task.id,
documentId: docId,
status: 'failed',
errorMessage: error.message,
},
});
failedCount++;
}
// 推送进度
if (onProgress) {
const progress: BatchProgress = {
taskId: task.id,
completed: completedCount + failedCount,
total: documentIds.length,
failed: failedCount,
estimatedSeconds: calculateEstimatedTime(
completedCount + failedCount,
documentIds.length,
Date.now() - startTime
),
};
onProgress(progress);
}
// 更新任务进度
await prisma.batchTask.update({
where: { id: task.id },
data: {
completedCount,
failedCount,
},
});
})
);
// 等待所有任务完成
await Promise.allSettled(promises);
// 计算总时长
const durationSeconds = Math.round((Date.now() - startTime) / 1000);
// 更新任务状态
await prisma.batchTask.update({
where: { id: task.id },
data: {
status: 'completed',
completedAt: new Date(),
durationSeconds,
},
});
console.log(`🎉 [BatchService] 批处理任务完成: ${task.id}`, {
total: documentIds.length,
success: completedCount,
failed: failedCount,
durationSeconds,
});
return {
taskId: task.id,
status: 'completed',
totalDocuments: documentIds.length,
completedCount,
failedCount,
durationSeconds,
};
}
/**
* 处理单个文档
*/
async function processDocument(params: {
document: {
id: string;
filename: string;
extractedText: string;
tokensCount: number | null;
};
systemPrompt: string;
userPromptTemplate: string;
modelType: ModelType;
templateType: 'preset' | 'custom';
expectedFields: string[];
}): Promise<{
data: any;
rawOutput: string;
processingTimeMs: number;
tokensUsed?: number;
}> {
const {
document,
systemPrompt,
userPromptTemplate,
modelType,
templateType,
expectedFields,
} = params;
const startTime = Date.now();
// 构造完整的用户消息
const userMessage = `${userPromptTemplate}\n\n【文献${document.filename}\n\n${document.extractedText}`;
// 调用LLM
const adapter = LLMFactory.getAdapter(modelType);
const response = await adapter.chat(
[
{ role: 'system', content: systemPrompt },
{ role: 'user', content: userMessage },
],
{
temperature: 0.3, // 降低温度提高稳定性
maxTokens: 2000,
}
);
const processingTimeMs = Date.now() - startTime;
const rawOutput = response.content;
// 解析结果
let data: any;
if (templateType === 'preset') {
// 预设模板解析JSON
const parseResult = parseJSON(rawOutput, expectedFields);
if (!parseResult.success) {
throw new Error(`JSON解析失败: ${parseResult.error}`);
}
data = parseResult.data;
} else {
// 自定义模板:直接使用文本
data = {
extracted_text: rawOutput,
};
}
return {
data,
rawOutput,
processingTimeMs,
tokensUsed: response.usage?.totalTokens,
};
}
/**
* 计算预估剩余时间
*/
function calculateEstimatedTime(
completed: number,
total: number,
elapsedMs: number
): number {
if (completed === 0) return 0;
const avgTimePerDoc = elapsedMs / completed;
const remaining = total - completed;
return Math.round((avgTimePerDoc * remaining) / 1000);
}
/**
* 重试失败的文档
*/
export async function retryFailedDocuments(
taskId: string,
onProgress?: (progress: BatchProgress) => void
): Promise<{ retriedCount: number }> {
console.log(`🔄 [BatchService] 重试失败文档: ${taskId}`);
// 获取任务信息
const task = await prisma.batchTask.findUnique({
where: { id: taskId },
include: {
results: {
where: { status: 'failed' },
},
},
});
if (!task) {
throw new Error(`任务不存在: ${taskId}`);
}
const failedDocIds = task.results.map(r => r.documentId);
if (failedDocIds.length === 0) {
return { retriedCount: 0 };
}
// 删除旧的失败记录
await prisma.batchResult.deleteMany({
where: {
taskId,
status: 'failed',
},
});
// 重新执行
await executeBatchTask({
userId: task.userId,
kbId: task.kbId,
documentIds: failedDocIds,
templateType: task.templateType as 'preset' | 'custom',
templateId: task.templateId || undefined,
customPrompt: task.templateType === 'custom' ? task.prompt : undefined,
modelType: task.modelType as ModelType,
taskName: `${task.name} (重试)`,
onProgress,
});
return { retriedCount: failedDocIds.length };
}

View File

@@ -0,0 +1,360 @@
import { prisma } from '../../../config/database.js';
import { difyClient } from '../../../common/rag/DifyClient.js';
import { extractionClient } from '../../../common/document/ExtractionClient.js';
/**
* 文档服务
*/
/**
* 上传文档到知识库
*/
export async function uploadDocument(
userId: string,
kbId: string,
file: Buffer,
filename: string,
fileType: string,
fileSizeBytes: number,
fileUrl: string
) {
// 1. 验证知识库权限
const knowledgeBase = await prisma.knowledgeBase.findFirst({
where: {
id: kbId,
userId,
},
});
if (!knowledgeBase) {
throw new Error('Knowledge base not found or access denied');
}
// 2. 检查文档数量限制每个知识库最多50个文档
const documentCount = await prisma.document.count({
where: { kbId },
});
if (documentCount >= 50) {
throw new Error('Document limit exceeded. Maximum 50 documents per knowledge base');
}
// 3. 在数据库中创建文档记录状态uploading
const document = await prisma.document.create({
data: {
kbId,
userId,
filename,
fileType,
fileSizeBytes,
fileUrl,
difyDocumentId: '', // 暂时为空,稍后更新
status: 'uploading',
progress: 0,
},
});
try {
// 4. Phase 2: 调用提取服务提取文本内容
let extractionResult;
let extractedText = '';
let extractionMethod = '';
let extractionQuality: number | null = null;
let charCount: number | null = null;
let detectedLanguage: string | null = null;
try {
console.log(`[Phase2] 开始提取文档: ${filename}`);
extractionResult = await extractionClient.extractDocument(file, filename);
if (extractionResult.success) {
extractedText = extractionResult.text;
extractionMethod = extractionResult.method;
extractionQuality = extractionResult.quality || null;
charCount = extractionResult.metadata?.char_count || null;
detectedLanguage = extractionResult.language || null;
console.log(`[Phase2] 提取成功: method=${extractionMethod}, chars=${charCount}, language=${detectedLanguage}`);
}
} catch (extractionError) {
console.error('[Phase2] 文档提取失败但继续上传到Dify:', extractionError);
// 提取失败不影响Dify上传但记录错误
}
// 5. 上传到Dify
const difyResult = await difyClient.uploadDocumentDirectly(
knowledgeBase.difyDatasetId,
file,
filename
);
// 6. 更新文档记录更新difyDocumentId、状态和Phase 2字段
const updatedDocument = await prisma.document.update({
where: { id: document.id },
data: {
difyDocumentId: difyResult.document.id,
status: difyResult.document.indexing_status,
progress: 50,
// Phase 2新增字段
extractedText: extractedText || null,
extractionMethod: extractionMethod || null,
extractionQuality: extractionQuality,
charCount: charCount,
language: detectedLanguage,
},
});
// 7. 启动后台轮询任务,等待处理完成
pollDocumentStatus(userId, kbId, document.id, difyResult.document.id).catch(error => {
console.error('Failed to poll document status:', error);
});
// 8. 更新知识库统计
await updateKnowledgeBaseStats(kbId);
// 9. 转换BigInt为Number
return {
...updatedDocument,
fileSizeBytes: Number(updatedDocument.fileSizeBytes),
};
} catch (error) {
// 上传失败更新状态为error
await prisma.document.update({
where: { id: document.id },
data: {
status: 'error',
errorMessage: error instanceof Error ? error.message : 'Upload failed',
},
});
throw error;
}
}
/**
* 轮询文档处理状态
*/
async function pollDocumentStatus(
userId: string,
kbId: string,
documentId: string,
difyDocumentId: string,
maxAttempts: number = 30
) {
const knowledgeBase = await prisma.knowledgeBase.findFirst({
where: { id: kbId, userId },
});
if (!knowledgeBase) {
return;
}
for (let i = 0; i < maxAttempts; i++) {
await new Promise(resolve => setTimeout(resolve, 2000)); // 等待2秒
try {
// 查询Dify中的文档状态
const difyDocument = await difyClient.getDocument(
knowledgeBase.difyDatasetId,
difyDocumentId
);
// 更新数据库中的状态
await prisma.document.update({
where: { id: documentId },
data: {
status: difyDocument.indexing_status,
progress: difyDocument.indexing_status === 'completed' ? 100 : 50 + (i * 2),
segmentsCount: difyDocument.indexing_status === 'completed' ? difyDocument.word_count : null,
tokensCount: difyDocument.indexing_status === 'completed' ? difyDocument.tokens : null,
processedAt: difyDocument.indexing_status === 'completed' ? new Date() : null,
errorMessage: difyDocument.error || null,
},
});
// 如果完成或失败,退出轮询
if (difyDocument.indexing_status === 'completed') {
await updateKnowledgeBaseStats(kbId);
break;
}
if (difyDocument.indexing_status === 'error') {
break;
}
} catch (error) {
console.error(`Polling attempt ${i + 1} failed:`, error);
}
}
}
/**
* 获取文档列表
*/
export async function getDocuments(userId: string, kbId: string) {
// 1. 验证权限
const knowledgeBase = await prisma.knowledgeBase.findFirst({
where: {
id: kbId,
userId,
},
});
if (!knowledgeBase) {
throw new Error('Knowledge base not found or access denied');
}
// 2. 查询文档列表
const documents = await prisma.document.findMany({
where: { kbId },
orderBy: { uploadedAt: 'desc' },
});
// 3. 转换BigInt为Number
return documents.map(doc => ({
...doc,
fileSizeBytes: Number(doc.fileSizeBytes),
}));
}
/**
* 获取文档详情
*/
export async function getDocumentById(userId: string, documentId: string) {
const document = await prisma.document.findFirst({
where: {
id: documentId,
userId, // 确保只能访问自己的文档
},
include: {
knowledgeBase: true,
},
});
if (!document) {
throw new Error('Document not found or access denied');
}
// 转换BigInt为Number
return {
...document,
fileSizeBytes: Number(document.fileSizeBytes),
knowledgeBase: {
...document.knowledgeBase,
totalSizeBytes: Number(document.knowledgeBase.totalSizeBytes),
},
};
}
/**
* 删除文档
*/
export async function deleteDocument(userId: string, documentId: string) {
// 1. 查询文档信息
const document = await prisma.document.findFirst({
where: {
id: documentId,
userId,
},
include: {
knowledgeBase: true,
},
});
if (!document) {
throw new Error('Document not found or access denied');
}
// 2. 删除Dify中的文档
if (document.difyDocumentId) {
try {
await difyClient.deleteDocument(
document.knowledgeBase.difyDatasetId,
document.difyDocumentId
);
} catch (error) {
console.error('Failed to delete Dify document:', error);
// 继续删除本地记录
}
}
// 3. 删除数据库记录
await prisma.document.delete({
where: { id: documentId },
});
// 4. 更新知识库统计
await updateKnowledgeBaseStats(document.kbId);
}
/**
* 重新处理文档
*/
export async function reprocessDocument(userId: string, documentId: string) {
// 1. 查询文档信息
const document = await prisma.document.findFirst({
where: {
id: documentId,
userId,
},
include: {
knowledgeBase: true,
},
});
if (!document) {
throw new Error('Document not found or access denied');
}
// 2. 触发Dify重新索引
if (document.difyDocumentId) {
try {
await difyClient.updateDocument(
document.knowledgeBase.difyDatasetId,
document.difyDocumentId
);
// 3. 更新状态为processing
await prisma.document.update({
where: { id: documentId },
data: {
status: 'parsing',
progress: 0,
errorMessage: null,
},
});
// 4. 启动轮询
pollDocumentStatus(
userId,
document.kbId,
documentId,
document.difyDocumentId
).catch(error => {
console.error('Failed to poll document status:', error);
});
} catch (error) {
throw new Error('Failed to reprocess document');
}
}
}
/**
* 更新知识库统计信息
*/
async function updateKnowledgeBaseStats(kbId: string) {
const documents = await prisma.document.findMany({
where: { kbId },
});
const totalSizeBytes = documents.reduce((sum, d) => sum + Number(d.fileSizeBytes), 0);
const fileCount = documents.length;
await prisma.knowledgeBase.update({
where: { id: kbId },
data: {
fileCount,
totalSizeBytes: BigInt(totalSizeBytes),
},
});
}

View File

@@ -0,0 +1,364 @@
import { prisma } from '../../../config/database.js';
import { difyClient } from '../../../common/rag/DifyClient.js';
import { calculateDocumentTokens, selectDocumentsForFullText, TOKEN_LIMITS } from './tokenService.js';
/**
* 知识库服务
*/
/**
* 创建知识库
*/
export async function createKnowledgeBase(
userId: string,
name: string,
description?: string
) {
// 1. 检查用户知识库配额
const user = await prisma.user.findUnique({
where: { id: userId },
select: { kbQuota: true, kbUsed: true }
});
if (!user) {
throw new Error('User not found');
}
if (user.kbUsed >= user.kbQuota) {
throw new Error(`Knowledge base quota exceeded. Maximum: ${user.kbQuota}`);
}
// 2. 在Dify中创建Dataset
const difyDataset = await difyClient.createDataset({
name: `${userId}_${name}_${Date.now()}`,
description: description || `Knowledge base for user ${userId}`,
indexing_technique: 'high_quality',
});
// 3. 在数据库中创建记录
const knowledgeBase = await prisma.knowledgeBase.create({
data: {
userId,
name,
description,
difyDatasetId: difyDataset.id,
},
});
// 4. 更新用户的知识库使用计数
await prisma.user.update({
where: { id: userId },
data: {
kbUsed: { increment: 1 },
},
});
// 5. 转换BigInt为Number
return {
...knowledgeBase,
totalSizeBytes: Number(knowledgeBase.totalSizeBytes),
};
}
/**
* 获取用户的知识库列表
*/
export async function getKnowledgeBases(userId: string) {
const knowledgeBases = await prisma.knowledgeBase.findMany({
where: { userId },
orderBy: { createdAt: 'desc' },
include: {
_count: {
select: { documents: true },
},
},
});
// 转换BigInt为Number
return knowledgeBases.map(kb => ({
...kb,
totalSizeBytes: Number(kb.totalSizeBytes),
}));
}
/**
* 获取知识库详情
*/
export async function getKnowledgeBaseById(userId: string, kbId: string) {
const knowledgeBase = await prisma.knowledgeBase.findFirst({
where: {
id: kbId,
userId, // 确保只能访问自己的知识库
},
include: {
documents: {
orderBy: { uploadedAt: 'desc' },
},
_count: {
select: { documents: true },
},
},
});
if (!knowledgeBase) {
throw new Error('Knowledge base not found or access denied');
}
// 转换BigInt为Number
const result = {
...knowledgeBase,
totalSizeBytes: Number(knowledgeBase.totalSizeBytes),
documents: knowledgeBase.documents.map(doc => ({
...doc,
fileSizeBytes: Number(doc.fileSizeBytes),
})),
};
return result;
}
/**
* 更新知识库
*/
export async function updateKnowledgeBase(
userId: string,
kbId: string,
data: { name?: string; description?: string }
) {
// 1. 验证权限
const existingKb = await prisma.knowledgeBase.findFirst({
where: {
id: kbId,
userId,
},
});
if (!existingKb) {
throw new Error('Knowledge base not found or access denied');
}
// 2. 更新数据库
const knowledgeBase = await prisma.knowledgeBase.update({
where: { id: kbId },
data,
});
// 3. 转换BigInt为Number
return {
...knowledgeBase,
totalSizeBytes: Number(knowledgeBase.totalSizeBytes),
};
}
/**
* 删除知识库
*/
export async function deleteKnowledgeBase(userId: string, kbId: string) {
// 1. 验证权限
const knowledgeBase = await prisma.knowledgeBase.findFirst({
where: {
id: kbId,
userId,
},
});
if (!knowledgeBase) {
throw new Error('Knowledge base not found or access denied');
}
// 2. 删除Dify中的Dataset
try {
await difyClient.deleteDataset(knowledgeBase.difyDatasetId);
} catch (error) {
console.error('Failed to delete Dify dataset:', error);
// 继续删除本地记录即使Dify删除失败
}
// 3. 删除数据库记录会级联删除documents
await prisma.knowledgeBase.delete({
where: { id: kbId },
});
// 4. 更新用户的知识库使用计数
await prisma.user.update({
where: { id: userId },
data: {
kbUsed: { decrement: 1 },
},
});
}
/**
* 检索知识库
*/
export async function searchKnowledgeBase(
userId: string,
kbId: string,
query: string,
topK: number = 15 // Phase 1优化默认从3增加到15
) {
console.log('🔍 [searchKnowledgeBase] 开始检索', { kbId, query, topK });
// 1. 验证权限
const knowledgeBase = await prisma.knowledgeBase.findFirst({
where: {
id: kbId,
userId,
},
});
if (!knowledgeBase) {
console.error('❌ [searchKnowledgeBase] 知识库不存在', { kbId, userId });
throw new Error('Knowledge base not found or access denied');
}
console.log('📚 [searchKnowledgeBase] 找到知识库', {
id: knowledgeBase.id,
name: knowledgeBase.name,
difyDatasetId: knowledgeBase.difyDatasetId
});
// 2. 调用Dify检索API
console.log('🌐 [searchKnowledgeBase] 调用Dify检索API', {
difyDatasetId: knowledgeBase.difyDatasetId,
query,
topK
});
const results = await difyClient.retrieveKnowledge(
knowledgeBase.difyDatasetId,
query,
{
retrieval_model: {
search_method: 'semantic_search',
top_k: topK,
},
}
);
console.log('✅ [searchKnowledgeBase] Dify返回结果', {
recordCount: results.records?.length || 0,
hasRecords: results.records && results.records.length > 0
});
if (results.records && results.records.length > 0) {
console.log('📄 [searchKnowledgeBase] 检索到的记录:',
results.records.map((r: any) => ({
score: r.score,
contentPreview: r.segment?.content?.substring(0, 100)
}))
);
} else {
console.warn('⚠️ [searchKnowledgeBase] 没有检索到任何记录');
}
return results;
}
/**
* 获取知识库统计信息
*/
export async function getKnowledgeBaseStats(userId: string, kbId: string) {
// 1. 验证权限
const knowledgeBase = await prisma.knowledgeBase.findFirst({
where: {
id: kbId,
userId,
},
include: {
documents: true,
},
});
if (!knowledgeBase) {
throw new Error('Knowledge base not found or access denied');
}
// 2. 统计信息
const stats = {
totalDocuments: knowledgeBase.documents.length,
completedDocuments: knowledgeBase.documents.filter(d => d.status === 'completed').length,
processingDocuments: knowledgeBase.documents.filter(d =>
['uploading', 'parsing', 'indexing'].includes(d.status)
).length,
errorDocuments: knowledgeBase.documents.filter(d => d.status === 'error').length,
totalSizeBytes: knowledgeBase.totalSizeBytes,
totalTokens: knowledgeBase.documents.reduce((sum, d) => sum + (d.tokensCount || 0), 0),
};
return stats;
}
/**
* 获取知识库文档选择(用于全文阅读模式)
* Phase 2新增根据Token限制选择文档
*/
export async function getDocumentSelection(
userId: string,
kbId: string,
maxFiles?: number,
maxTokens?: number
) {
// 1. 验证权限
const knowledgeBase = await prisma.knowledgeBase.findFirst({
where: { id: kbId, userId },
include: {
documents: {
where: {
status: 'completed', // 只选择已完成的文档
},
select: {
id: true,
filename: true,
extractedText: true,
charCount: true,
extractionMethod: true,
tokensCount: true,
fileSizeBytes: true,
},
orderBy: { uploadedAt: 'desc' },
},
},
});
if (!knowledgeBase) {
throw new Error('Knowledge base not found or access denied');
}
// 2. 计算每个文档的Token数
const documentTokens = calculateDocumentTokens(knowledgeBase.documents);
// 3. 选择文档根据Token限制
const selection = selectDocumentsForFullText(
documentTokens,
maxFiles || TOKEN_LIMITS.MAX_FILES,
maxTokens || TOKEN_LIMITS.MAX_TOTAL_TOKENS
);
// 4. 返回结果
return {
knowledgeBaseId: kbId,
knowledgeBaseName: knowledgeBase.name,
limits: {
maxFiles: maxFiles || TOKEN_LIMITS.MAX_FILES,
maxTokens: maxTokens || TOKEN_LIMITS.MAX_TOTAL_TOKENS,
},
selection: {
selectedCount: selection.totalFiles,
selectedTokens: selection.totalTokens,
excludedCount: selection.excludedDocuments.length,
availableTokens: selection.availableTokens,
reason: selection.reason,
},
selectedDocuments: selection.selectedDocuments.map(doc => ({
...doc,
// 查找原始文档信息
...knowledgeBase.documents.find(d => d.id === doc.documentId),
})),
excludedDocuments: selection.excludedDocuments.map(doc => ({
...doc,
// 查找原始文档信息
...knowledgeBase.documents.find(d => d.id === doc.documentId),
})),
};
}

View File

@@ -0,0 +1,232 @@
import { encoding_for_model, Tiktoken } from 'tiktoken';
/**
* Token计数服务
* 用于全文阅读模式的Token管理
*/
// Token限制配置
export const TOKEN_LIMITS = {
MAX_FILES: 50, // 最多50个文件
MAX_TOTAL_TOKENS: 980000, // 最多980K tokens为Qwen-Long 1M上下文留20K余量
CONTEXT_RESERVE: 20000, // 预留给系统提示词和用户查询的token
};
// 缓存编码器
let encoderCache: Tiktoken | null = null;
/**
* 获取编码器使用gpt-4作为Qwen的替代
*/
function getEncoder(): Tiktoken {
if (!encoderCache) {
// Qwen使用类似GPT-4的tokenizer
encoderCache = encoding_for_model('gpt-4');
}
return encoderCache;
}
/**
* 计算文本的Token数
*/
export function countTokens(text: string): number {
if (!text || text.trim().length === 0) {
return 0;
}
try {
const encoder = getEncoder();
const tokens = encoder.encode(text);
return tokens.length;
} catch (error) {
console.error('[TokenService] Failed to count tokens:', error);
// 降级粗略估算中文约1.5字符/token英文约4字符/token
const chineseChars = (text.match(/[\u4e00-\u9fff]/g) || []).length;
const totalChars = text.length;
const englishChars = totalChars - chineseChars;
return Math.ceil(chineseChars / 1.5 + englishChars / 4);
}
}
/**
* 批量计算多个文本的Token数
*/
export function countTokensBatch(texts: string[]): number[] {
return texts.map(text => countTokens(text));
}
/**
* 计算文档Token数基于提取的文本
*/
export interface DocumentTokenInfo {
documentId: string;
filename: string;
charCount: number;
estimatedTokens: number;
extractionMethod?: string;
}
/**
* 为文档列表计算Token数
*/
export function calculateDocumentTokens(
documents: Array<{
id: string;
filename: string;
extractedText?: string | null;
charCount?: number | null;
extractionMethod?: string | null;
}>
): DocumentTokenInfo[] {
return documents.map(doc => {
let estimatedTokens = 0;
if (doc.extractedText) {
// 使用提取的文本计算精确token数
estimatedTokens = countTokens(doc.extractedText);
} else if (doc.charCount) {
// 如果没有提取文本,使用字符数估算
// 假设中英文混合平均2.5字符/token
estimatedTokens = Math.ceil(doc.charCount / 2.5);
}
return {
documentId: doc.id,
filename: doc.filename,
charCount: doc.charCount || 0,
estimatedTokens,
extractionMethod: doc.extractionMethod || undefined,
};
});
}
/**
* 选择文档以满足Token限制
* 策略优先选择Token数少的文档直到达到限制
*/
export interface DocumentSelectionResult {
selectedDocuments: DocumentTokenInfo[];
totalTokens: number;
totalFiles: number;
excludedDocuments: DocumentTokenInfo[];
reason: 'all_included' | 'file_limit' | 'token_limit';
availableTokens: number;
}
export function selectDocumentsForFullText(
documents: DocumentTokenInfo[],
maxFiles: number = TOKEN_LIMITS.MAX_FILES,
maxTokens: number = TOKEN_LIMITS.MAX_TOTAL_TOKENS
): DocumentSelectionResult {
// 按Token数升序排序优先选择小文件
const sortedDocs = [...documents].sort(
(a, b) => a.estimatedTokens - b.estimatedTokens
);
const selected: DocumentTokenInfo[] = [];
const excluded: DocumentTokenInfo[] = [];
let totalTokens = 0;
for (const doc of sortedDocs) {
// 检查文件数限制
if (selected.length >= maxFiles) {
excluded.push(doc);
continue;
}
// 检查Token限制
if (totalTokens + doc.estimatedTokens > maxTokens) {
excluded.push(doc);
continue;
}
// 添加到选中列表
selected.push(doc);
totalTokens += doc.estimatedTokens;
}
// 判断限制原因
let reason: 'all_included' | 'file_limit' | 'token_limit' = 'all_included';
if (excluded.length > 0) {
if (selected.length >= maxFiles) {
reason = 'file_limit';
} else {
reason = 'token_limit';
}
}
return {
selectedDocuments: selected,
totalTokens,
totalFiles: selected.length,
excludedDocuments: excluded,
reason,
availableTokens: maxTokens - totalTokens,
};
}
/**
* 估算查询需要的Token数
*/
export function estimateQueryTokens(query: string, systemPrompt?: string): number {
let total = countTokens(query);
if (systemPrompt) {
total += countTokens(systemPrompt);
}
// 为响应预留空间
total += 2000; // 假设响应最多2000 tokens
return total;
}
/**
* 检查是否超过Token限制
*/
export function checkTokenLimit(
documentsTokens: number,
queryTokens: number,
maxTokens: number = TOKEN_LIMITS.MAX_TOTAL_TOKENS
): {
withinLimit: boolean;
totalTokens: number;
maxTokens: number;
remaining: number;
} {
const totalTokens = documentsTokens + queryTokens;
const remaining = maxTokens - totalTokens;
return {
withinLimit: remaining >= 0,
totalTokens,
maxTokens,
remaining,
};
}
/**
* 释放编码器(清理资源)
*/
export function cleanup() {
if (encoderCache) {
encoderCache.free();
encoderCache = null;
}
}
// 进程退出时清理
if (typeof process !== 'undefined') {
process.on('exit', cleanup);
process.on('SIGINT', () => {
cleanup();
process.exit();
});
}

View File

@@ -0,0 +1,152 @@
/**
* Phase 3: 批处理模式 - 临床研究信息提取模板
*
* 提取临床研究的8个核心字段
* 1. 研究目的
* 2. 研究设计
* 3. 研究对象
* 4. 样本量text类型保留原文描述
* 5. 干预组
* 6. 对照组
* 7. 结果及数据
* 8. 牛津评级(提供详细标准)
*/
export interface TemplateField {
key: string;
label: string;
type: 'text' | 'longtext' | 'number';
description?: string;
}
export interface BatchTemplate {
id: string;
name: string;
description: string;
outputFields: TemplateField[];
systemPrompt: string;
userPrompt: string;
}
export const CLINICAL_RESEARCH_TEMPLATE: BatchTemplate = {
id: 'clinical_research',
name: '临床研究信息提取',
description: '提取研究目的、设计、对象、样本量、干预、对照、结果、证据等级',
outputFields: [
{
key: 'research_purpose',
label: '研究目的',
type: 'text',
description: '研究想要解决的问题或验证的假设'
},
{
key: 'research_design',
label: '研究设计',
type: 'text',
description: '研究类型RCT、队列研究等'
},
{
key: 'research_subjects',
label: '研究对象',
type: 'text',
description: '纳入/排除标准、人群特征'
},
{
key: 'sample_size',
label: '样本量',
type: 'text', // ✅ text类型保留原文描述
description: '实际纳入的受试者人数'
},
{
key: 'intervention_group',
label: '干预组',
type: 'text',
description: '实验组的干预措施'
},
{
key: 'control_group',
label: '对照组',
type: 'text',
description: '对照组的情况'
},
{
key: 'results_data',
label: '结果及数据',
type: 'longtext',
description: '主要结局指标的具体数据'
},
{
key: 'oxford_level',
label: '牛津评级',
type: 'text',
description: '证据等级(1a-5)'
},
],
systemPrompt: `你是一个专业的临床研究数据提取助手。
你的任务是从临床研究文献中提取结构化信息。
你的回答必须严格遵循JSON格式不要有任何额外的文字说明。`,
userPrompt: `请仔细阅读这篇临床研究文献,提取以下信息:
1. **研究目的**本研究想要解决什么问题或验证什么假设用1-2句话概括。
2. **研究设计**:研究类型,如随机对照试验(RCT)、队列研究、病例对照研究、横断面研究、系统评价/Meta分析等。
3. **研究对象**:描述纳入标准、排除标准、人群特征(年龄、性别、疾病状态等)。
4. **样本量**:实际纳入的受试者人数,保留原文描述(如"干预组156人对照组152人共308人")。
5. **干预组**:实验组接受的治疗或干预措施,包括药物名称、剂量、给药方式、疗程等。
6. **对照组**:对照组的情况,如安慰剂、标准治疗、空白对照等。
7. **结果及数据**主要结局指标的具体数据、统计结果、P值、置信区间等。包括基线数据对比和终点数据对比。
8. **牛津评级**:根据研究设计判断证据等级,参考以下标准:
- **1a**:系统评价/Meta分析多个RCT的汇总分析
- **1b**:单个随机对照试验(RCT)
- **2a**:设计良好的对照研究(无随机化)
- **2b**:设计良好的准实验研究(队列研究、病例对照研究)
- **3a**:描述性研究(横断面研究、病例系列)
- **3b**:个案报告(单一病例)
- **4**:专家意见、共识声明
- **5**:基础研究(动物实验、体外研究)
请严格按照以下JSON格式输出不要有任何额外说明或前言
{
"research_purpose": "...",
"research_design": "...",
"research_subjects": "...",
"sample_size": "...",
"intervention_group": "...",
"control_group": "...",
"results_data": "...",
"oxford_level": "..."
}`,
};
// 导出所有预设模板
export const PRESET_TEMPLATES: Record<string, BatchTemplate> = {
[CLINICAL_RESEARCH_TEMPLATE.id]: CLINICAL_RESEARCH_TEMPLATE,
};
// 获取模板
export function getTemplate(templateId: string): BatchTemplate | null {
return PRESET_TEMPLATES[templateId] || null;
}
// 获取所有模板列表
export function getAllTemplates(): BatchTemplate[] {
return Object.values(PRESET_TEMPLATES);
}

View File

@@ -402,6 +402,10 @@ SET session_replication_role = 'origin';

View File

@@ -104,6 +104,10 @@ WHERE key = 'verify_test';

View File

@@ -247,6 +247,10 @@ verifyDatabase()

View File

@@ -37,6 +37,10 @@ export {}