Files
AIclinicalresearch/backend/src/modules/pkb/controllers/documentController.ts
HaHafeng 61cdc97eeb feat(platform): Fix pg-boss queue conflict and add safety standards
Summary:
- Fix pg-boss queue conflict (duplicate key violation on queue_pkey)
- Add global error listener to prevent process crash
- Reduce connection pool from 10 to 4
- Add graceful shutdown handling (SIGTERM/SIGINT)
- Fix researchWorker recursive call bug in catch block
- Make screeningWorker idempotent using upsert

Security Standards (v1.1):
- Prohibit recursive retry in Worker catch blocks
- Prohibit payload bloat (only store fileKey/ID in job.data)
- Require Worker idempotency (upsert + unique constraint)
- Recommend task-specific expireInSeconds settings
- Document graceful shutdown pattern

New Features:
- PKB signed URL endpoint for document preview/download
- pg_bigm installation guide for Docker
- Dockerfile.postgres-with-extensions for pgvector + pg_bigm

Documentation:
- Update Postgres-Only async task processing guide (v1.1)
- Add troubleshooting SQL queries
- Update safety checklist

Tested: Local verification passed
2026-01-23 22:07:26 +08:00

469 lines
12 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import type { FastifyRequest, FastifyReply } from 'fastify';
import * as documentService from '../services/documentService.js';
import { storage } from '../../../common/storage/index.js';
import { OSSAdapter } from '../../../common/storage/OSSAdapter.js';
import { randomUUID } from 'crypto';
import path from 'path';
import { logger } from '../../../common/logging/index.js';
/**
* 获取用户ID从JWT Token中获取
*/
function getUserId(request: FastifyRequest): string {
const userId = (request as any).user?.userId;
if (!userId) {
throw new Error('User not authenticated');
}
return userId;
}
/**
* 获取租户ID从JWT Token中获取
*/
function getTenantId(request: FastifyRequest): string {
const tenantId = (request as any).user?.tenantId;
// 如果没有租户ID使用默认值
return tenantId || 'default';
}
/**
* 生成 PKB 文档存储 Key
* 格式tenants/{tenantId}/users/{userId}/pkb/{kbId}/{uuid}.{ext}
*/
function generatePkbStorageKey(
tenantId: string,
userId: string,
kbId: string,
filename: string
): string {
const uuid = randomUUID().replace(/-/g, '').substring(0, 16);
const ext = path.extname(filename).toLowerCase();
return `tenants/${tenantId}/users/${userId}/pkb/${kbId}/${uuid}${ext}`;
}
/**
* 上传文档
*/
export async function uploadDocument(
request: FastifyRequest<{
Params: {
kbId: string;
};
}>,
reply: FastifyReply
) {
try {
const { kbId } = request.params;
console.log(`📤 开始上传文档到知识库: ${kbId}`);
// 获取上传的文件
const data = await request.file();
if (!data) {
console.error('❌ 没有接收到文件');
return reply.status(400).send({
success: false,
message: 'No file uploaded',
});
}
console.log(`📄 接收到文件: ${data.filename}, 类型: ${data.mimetype}`);
const file = await data.toBuffer();
const filename = data.filename;
const fileType = data.mimetype;
const fileSizeBytes = file.length;
// 文件大小限制30MB - 按 OSS 规范)
const maxSize = 30 * 1024 * 1024;
console.log(`📊 文件大小: ${(fileSizeBytes / 1024 / 1024).toFixed(2)}MB (限制: 30MB)`);
if (fileSizeBytes > maxSize) {
console.error(`❌ 文件太大: ${(fileSizeBytes / 1024 / 1024).toFixed(2)}MB`);
return reply.status(400).send({
success: false,
message: 'File size exceeds 30MB limit',
});
}
// 文件类型限制
const allowedTypes = [
'application/pdf',
'application/msword',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'text/plain',
'text/markdown',
];
console.log(`🔍 检查文件类型: ${fileType}`);
if (!allowedTypes.includes(fileType)) {
console.error(`❌ 不支持的文件类型: ${fileType}`);
return reply.status(400).send({
success: false,
message: 'File type not supported. Allowed: PDF, DOC, DOCX, TXT, MD',
});
}
// 获取用户信息
const userId = getUserId(request);
const tenantId = getTenantId(request);
// 生成 OSS 存储 Key包含 kbId
const storageKey = generatePkbStorageKey(tenantId, userId, kbId, filename);
console.log(`📦 OSS 存储路径: ${storageKey}`);
// 上传到 OSS
console.log(`☁️ 上传文件到存储服务...`);
let fileUrl = '';
try {
fileUrl = await storage.upload(storageKey, file);
console.log(`✅ 文件已上传到存储服务`);
} catch (storageError) {
console.error(`❌ 存储服务上传失败:`, storageError);
return reply.status(500).send({
success: false,
message: 'Failed to upload file to storage',
});
}
// 调用文档服务处理(传入 storageKey
console.log(`⚙️ 调用文档服务处理文件...`);
const document = await documentService.uploadDocument(
userId,
kbId,
file,
filename,
fileType,
fileSizeBytes,
fileUrl,
storageKey // 新增:存储路径
);
console.log(`✅ 文档上传成功: ${document.id}`);
return reply.status(201).send({
success: true,
data: document,
});
} catch (error: any) {
console.error('❌ 文档上传失败:', error.message);
console.error('错误详情:', error);
if (error.message.includes('not found') || error.message.includes('access denied')) {
return reply.status(404).send({
success: false,
message: error.message,
});
}
if (error.message.includes('limit exceeded')) {
return reply.status(400).send({
success: false,
message: error.message,
});
}
return reply.status(500).send({
success: false,
message: error.message || 'Failed to upload document',
});
}
}
/**
* 获取文档列表
*/
export async function getDocuments(
request: FastifyRequest<{
Params: {
kbId: string;
};
}>,
reply: FastifyReply
) {
try {
const { kbId } = request.params;
const userId = getUserId(request);
const documents = await documentService.getDocuments(userId, kbId);
return reply.send({
success: true,
data: documents,
});
} catch (error: any) {
console.error('Failed to get documents:', error);
if (error.message.includes('not found')) {
return reply.status(404).send({
success: false,
message: error.message,
});
}
return reply.status(500).send({
success: false,
message: error.message || 'Failed to get documents',
});
}
}
/**
* 获取文档详情
*/
export async function getDocumentById(
request: FastifyRequest<{
Params: {
id: string;
};
}>,
reply: FastifyReply
) {
try {
const { id } = request.params;
const userId = getUserId(request);
const document = await documentService.getDocumentById(userId, id);
return reply.send({
success: true,
data: document,
});
} catch (error: any) {
console.error('Failed to get document:', error);
if (error.message.includes('not found')) {
return reply.status(404).send({
success: false,
message: error.message,
});
}
return reply.status(500).send({
success: false,
message: error.message || 'Failed to get document',
});
}
}
/**
* 删除文档
*/
export async function deleteDocument(
request: FastifyRequest<{
Params: {
id: string;
};
}>,
reply: FastifyReply
) {
try {
const { id } = request.params;
const userId = getUserId(request);
await documentService.deleteDocument(userId, id);
return reply.send({
success: true,
message: 'Document deleted successfully',
});
} catch (error: any) {
console.error('Failed to delete document:', error);
if (error.message.includes('not found')) {
return reply.status(404).send({
success: false,
message: error.message,
});
}
return reply.status(500).send({
success: false,
message: error.message || 'Failed to delete document',
});
}
}
/**
* 重新处理文档
*/
export async function reprocessDocument(
request: FastifyRequest<{
Params: {
id: string;
};
}>,
reply: FastifyReply
) {
try {
const { id } = request.params;
const userId = getUserId(request);
await documentService.reprocessDocument(userId, id);
return reply.send({
success: true,
message: 'Document reprocessing started',
});
} catch (error: any) {
console.error('Failed to reprocess document:', error);
if (error.message.includes('not found')) {
return reply.status(404).send({
success: false,
message: error.message,
});
}
return reply.status(500).send({
success: false,
message: error.message || 'Failed to reprocess document',
});
}
}
/**
* Phase 2: 获取文档全文(用于逐篇精读模式)
*/
export async function getDocumentFullText(
request: FastifyRequest<{
Params: {
id: string;
};
}>,
reply: FastifyReply
) {
try {
const { id } = request.params;
const userId = getUserId(request);
const document = await documentService.getDocumentById(userId, id);
// 返回完整的文档信息
return reply.send({
success: true,
data: {
documentId: document.id,
filename: document.filename,
fileType: document.fileType,
fileSizeBytes: document.fileSizeBytes,
extractedText: (document as any).extractedText || null,
charCount: (document as any).charCount || null,
tokensCount: document.tokensCount || null,
extractionMethod: (document as any).extractionMethod || null,
extractionQuality: (document as any).extractionQuality || null,
language: (document as any).language || null,
metadata: {
uploadedAt: document.uploadedAt,
processedAt: document.processedAt,
status: document.status,
},
},
});
} catch (error: any) {
console.error('Failed to get document full text:', error);
if (error.message.includes('not found')) {
return reply.status(404).send({
success: false,
message: error.message,
});
}
return reply.status(500).send({
success: false,
message: error.message || 'Failed to get document full text',
});
}
}
/**
* 获取文档签名URL用于前端预览/下载)
*
* @description
* 生成一个带有过期时间的签名URL前端可以直接使用该URL
* - 在浏览器中预览 PDF
* - 下载文件(会恢复原始文件名)
*/
export async function getDocumentSignedUrl(
request: FastifyRequest<{
Params: {
id: string;
};
Querystring: {
/** 过期时间默认3600秒 */
expires?: string;
/** 是否作为附件下载(添加 Content-Disposition默认 false */
download?: string;
};
}>,
reply: FastifyReply
) {
try {
const { id } = request.params;
const expires = parseInt(request.query.expires || '3600', 10);
const download = request.query.download === 'true';
const userId = getUserId(request);
// 获取文档信息
const document = await documentService.getDocumentById(userId, id);
// 检查是否有存储路径
if (!document.storageKey) {
logger.warn('[PKB] 文档没有存储路径,可能是旧数据', { documentId: id });
return reply.status(404).send({
success: false,
message: '文档文件不可用,请重新上传',
});
}
// 生成签名URL
let signedUrl: string;
// 检查存储适配器类型
if (storage instanceof OSSAdapter) {
// OSS: 使用带原始文件名的签名URL
signedUrl = download
? storage.getSignedUrl(document.storageKey, expires, document.filename)
: storage.getSignedUrl(document.storageKey, expires);
} else {
// 本地存储: 使用 getUrl
signedUrl = storage.getUrl(document.storageKey);
}
logger.info('[PKB] 生成签名URL', {
documentId: id,
filename: document.filename,
expires,
download
});
return reply.send({
success: true,
data: {
documentId: document.id,
filename: document.filename,
fileType: document.fileType,
url: signedUrl,
expiresIn: expires,
},
});
} catch (error: any) {
logger.error('[PKB] 获取签名URL失败', { error: error.message });
if (error.message.includes('not found') || error.message.includes('access denied')) {
return reply.status(404).send({
success: false,
message: error.message,
});
}
return reply.status(500).send({
success: false,
message: error.message || 'Failed to get signed URL',
});
}
}