Summary: - Fix pg-boss queue conflict (duplicate key violation on queue_pkey) - Add global error listener to prevent process crash - Reduce connection pool from 10 to 4 - Add graceful shutdown handling (SIGTERM/SIGINT) - Fix researchWorker recursive call bug in catch block - Make screeningWorker idempotent using upsert Security Standards (v1.1): - Prohibit recursive retry in Worker catch blocks - Prohibit payload bloat (only store fileKey/ID in job.data) - Require Worker idempotency (upsert + unique constraint) - Recommend task-specific expireInSeconds settings - Document graceful shutdown pattern New Features: - PKB signed URL endpoint for document preview/download - pg_bigm installation guide for Docker - Dockerfile.postgres-with-extensions for pgvector + pg_bigm Documentation: - Update Postgres-Only async task processing guide (v1.1) - Add troubleshooting SQL queries - Update safety checklist Tested: Local verification passed
469 lines
12 KiB
TypeScript
469 lines
12 KiB
TypeScript
import type { FastifyRequest, FastifyReply } from 'fastify';
|
||
import * as documentService from '../services/documentService.js';
|
||
import { storage } from '../../../common/storage/index.js';
|
||
import { OSSAdapter } from '../../../common/storage/OSSAdapter.js';
|
||
import { randomUUID } from 'crypto';
|
||
import path from 'path';
|
||
import { logger } from '../../../common/logging/index.js';
|
||
|
||
/**
|
||
* 获取用户ID(从JWT Token中获取)
|
||
*/
|
||
function getUserId(request: FastifyRequest): string {
|
||
const userId = (request as any).user?.userId;
|
||
if (!userId) {
|
||
throw new Error('User not authenticated');
|
||
}
|
||
return userId;
|
||
}
|
||
|
||
/**
|
||
* 获取租户ID(从JWT Token中获取)
|
||
*/
|
||
function getTenantId(request: FastifyRequest): string {
|
||
const tenantId = (request as any).user?.tenantId;
|
||
// 如果没有租户ID,使用默认值
|
||
return tenantId || 'default';
|
||
}
|
||
|
||
/**
|
||
* 生成 PKB 文档存储 Key
|
||
* 格式:tenants/{tenantId}/users/{userId}/pkb/{kbId}/{uuid}.{ext}
|
||
*/
|
||
function generatePkbStorageKey(
|
||
tenantId: string,
|
||
userId: string,
|
||
kbId: string,
|
||
filename: string
|
||
): string {
|
||
const uuid = randomUUID().replace(/-/g, '').substring(0, 16);
|
||
const ext = path.extname(filename).toLowerCase();
|
||
return `tenants/${tenantId}/users/${userId}/pkb/${kbId}/${uuid}${ext}`;
|
||
}
|
||
|
||
/**
|
||
* 上传文档
|
||
*/
|
||
export async function uploadDocument(
|
||
request: FastifyRequest<{
|
||
Params: {
|
||
kbId: string;
|
||
};
|
||
}>,
|
||
reply: FastifyReply
|
||
) {
|
||
try {
|
||
const { kbId } = request.params;
|
||
console.log(`📤 开始上传文档到知识库: ${kbId}`);
|
||
|
||
// 获取上传的文件
|
||
const data = await request.file();
|
||
|
||
if (!data) {
|
||
console.error('❌ 没有接收到文件');
|
||
return reply.status(400).send({
|
||
success: false,
|
||
message: 'No file uploaded',
|
||
});
|
||
}
|
||
|
||
console.log(`📄 接收到文件: ${data.filename}, 类型: ${data.mimetype}`);
|
||
|
||
const file = await data.toBuffer();
|
||
const filename = data.filename;
|
||
const fileType = data.mimetype;
|
||
const fileSizeBytes = file.length;
|
||
|
||
// 文件大小限制(30MB - 按 OSS 规范)
|
||
const maxSize = 30 * 1024 * 1024;
|
||
console.log(`📊 文件大小: ${(fileSizeBytes / 1024 / 1024).toFixed(2)}MB (限制: 30MB)`);
|
||
|
||
if (fileSizeBytes > maxSize) {
|
||
console.error(`❌ 文件太大: ${(fileSizeBytes / 1024 / 1024).toFixed(2)}MB`);
|
||
return reply.status(400).send({
|
||
success: false,
|
||
message: 'File size exceeds 30MB limit',
|
||
});
|
||
}
|
||
|
||
// 文件类型限制
|
||
const allowedTypes = [
|
||
'application/pdf',
|
||
'application/msword',
|
||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||
'text/plain',
|
||
'text/markdown',
|
||
];
|
||
|
||
console.log(`🔍 检查文件类型: ${fileType}`);
|
||
if (!allowedTypes.includes(fileType)) {
|
||
console.error(`❌ 不支持的文件类型: ${fileType}`);
|
||
return reply.status(400).send({
|
||
success: false,
|
||
message: 'File type not supported. Allowed: PDF, DOC, DOCX, TXT, MD',
|
||
});
|
||
}
|
||
|
||
// 获取用户信息
|
||
const userId = getUserId(request);
|
||
const tenantId = getTenantId(request);
|
||
|
||
// 生成 OSS 存储 Key(包含 kbId)
|
||
const storageKey = generatePkbStorageKey(tenantId, userId, kbId, filename);
|
||
console.log(`📦 OSS 存储路径: ${storageKey}`);
|
||
|
||
// 上传到 OSS
|
||
console.log(`☁️ 上传文件到存储服务...`);
|
||
let fileUrl = '';
|
||
try {
|
||
fileUrl = await storage.upload(storageKey, file);
|
||
console.log(`✅ 文件已上传到存储服务`);
|
||
} catch (storageError) {
|
||
console.error(`❌ 存储服务上传失败:`, storageError);
|
||
return reply.status(500).send({
|
||
success: false,
|
||
message: 'Failed to upload file to storage',
|
||
});
|
||
}
|
||
|
||
// 调用文档服务处理(传入 storageKey)
|
||
console.log(`⚙️ 调用文档服务处理文件...`);
|
||
const document = await documentService.uploadDocument(
|
||
userId,
|
||
kbId,
|
||
file,
|
||
filename,
|
||
fileType,
|
||
fileSizeBytes,
|
||
fileUrl,
|
||
storageKey // 新增:存储路径
|
||
);
|
||
|
||
console.log(`✅ 文档上传成功: ${document.id}`);
|
||
return reply.status(201).send({
|
||
success: true,
|
||
data: document,
|
||
});
|
||
} catch (error: any) {
|
||
console.error('❌ 文档上传失败:', error.message);
|
||
console.error('错误详情:', error);
|
||
|
||
if (error.message.includes('not found') || error.message.includes('access denied')) {
|
||
return reply.status(404).send({
|
||
success: false,
|
||
message: error.message,
|
||
});
|
||
}
|
||
|
||
if (error.message.includes('limit exceeded')) {
|
||
return reply.status(400).send({
|
||
success: false,
|
||
message: error.message,
|
||
});
|
||
}
|
||
|
||
return reply.status(500).send({
|
||
success: false,
|
||
message: error.message || 'Failed to upload document',
|
||
});
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 获取文档列表
|
||
*/
|
||
export async function getDocuments(
|
||
request: FastifyRequest<{
|
||
Params: {
|
||
kbId: string;
|
||
};
|
||
}>,
|
||
reply: FastifyReply
|
||
) {
|
||
try {
|
||
const { kbId } = request.params;
|
||
|
||
const userId = getUserId(request);
|
||
const documents = await documentService.getDocuments(userId, kbId);
|
||
|
||
return reply.send({
|
||
success: true,
|
||
data: documents,
|
||
});
|
||
} catch (error: any) {
|
||
console.error('Failed to get documents:', error);
|
||
|
||
if (error.message.includes('not found')) {
|
||
return reply.status(404).send({
|
||
success: false,
|
||
message: error.message,
|
||
});
|
||
}
|
||
|
||
return reply.status(500).send({
|
||
success: false,
|
||
message: error.message || 'Failed to get documents',
|
||
});
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 获取文档详情
|
||
*/
|
||
export async function getDocumentById(
|
||
request: FastifyRequest<{
|
||
Params: {
|
||
id: string;
|
||
};
|
||
}>,
|
||
reply: FastifyReply
|
||
) {
|
||
try {
|
||
const { id } = request.params;
|
||
|
||
const userId = getUserId(request);
|
||
const document = await documentService.getDocumentById(userId, id);
|
||
|
||
return reply.send({
|
||
success: true,
|
||
data: document,
|
||
});
|
||
} catch (error: any) {
|
||
console.error('Failed to get document:', error);
|
||
|
||
if (error.message.includes('not found')) {
|
||
return reply.status(404).send({
|
||
success: false,
|
||
message: error.message,
|
||
});
|
||
}
|
||
|
||
return reply.status(500).send({
|
||
success: false,
|
||
message: error.message || 'Failed to get document',
|
||
});
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 删除文档
|
||
*/
|
||
export async function deleteDocument(
|
||
request: FastifyRequest<{
|
||
Params: {
|
||
id: string;
|
||
};
|
||
}>,
|
||
reply: FastifyReply
|
||
) {
|
||
try {
|
||
const { id } = request.params;
|
||
|
||
const userId = getUserId(request);
|
||
await documentService.deleteDocument(userId, id);
|
||
|
||
return reply.send({
|
||
success: true,
|
||
message: 'Document deleted successfully',
|
||
});
|
||
} catch (error: any) {
|
||
console.error('Failed to delete document:', error);
|
||
|
||
if (error.message.includes('not found')) {
|
||
return reply.status(404).send({
|
||
success: false,
|
||
message: error.message,
|
||
});
|
||
}
|
||
|
||
return reply.status(500).send({
|
||
success: false,
|
||
message: error.message || 'Failed to delete document',
|
||
});
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 重新处理文档
|
||
*/
|
||
export async function reprocessDocument(
|
||
request: FastifyRequest<{
|
||
Params: {
|
||
id: string;
|
||
};
|
||
}>,
|
||
reply: FastifyReply
|
||
) {
|
||
try {
|
||
const { id } = request.params;
|
||
|
||
const userId = getUserId(request);
|
||
await documentService.reprocessDocument(userId, id);
|
||
|
||
return reply.send({
|
||
success: true,
|
||
message: 'Document reprocessing started',
|
||
});
|
||
} catch (error: any) {
|
||
console.error('Failed to reprocess document:', error);
|
||
|
||
if (error.message.includes('not found')) {
|
||
return reply.status(404).send({
|
||
success: false,
|
||
message: error.message,
|
||
});
|
||
}
|
||
|
||
return reply.status(500).send({
|
||
success: false,
|
||
message: error.message || 'Failed to reprocess document',
|
||
});
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Phase 2: 获取文档全文(用于逐篇精读模式)
|
||
*/
|
||
export async function getDocumentFullText(
|
||
request: FastifyRequest<{
|
||
Params: {
|
||
id: string;
|
||
};
|
||
}>,
|
||
reply: FastifyReply
|
||
) {
|
||
try {
|
||
const { id } = request.params;
|
||
|
||
const userId = getUserId(request);
|
||
const document = await documentService.getDocumentById(userId, id);
|
||
|
||
// 返回完整的文档信息
|
||
return reply.send({
|
||
success: true,
|
||
data: {
|
||
documentId: document.id,
|
||
filename: document.filename,
|
||
fileType: document.fileType,
|
||
fileSizeBytes: document.fileSizeBytes,
|
||
extractedText: (document as any).extractedText || null,
|
||
charCount: (document as any).charCount || null,
|
||
tokensCount: document.tokensCount || null,
|
||
extractionMethod: (document as any).extractionMethod || null,
|
||
extractionQuality: (document as any).extractionQuality || null,
|
||
language: (document as any).language || null,
|
||
metadata: {
|
||
uploadedAt: document.uploadedAt,
|
||
processedAt: document.processedAt,
|
||
status: document.status,
|
||
},
|
||
},
|
||
});
|
||
} catch (error: any) {
|
||
console.error('Failed to get document full text:', error);
|
||
|
||
if (error.message.includes('not found')) {
|
||
return reply.status(404).send({
|
||
success: false,
|
||
message: error.message,
|
||
});
|
||
}
|
||
|
||
return reply.status(500).send({
|
||
success: false,
|
||
message: error.message || 'Failed to get document full text',
|
||
});
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 获取文档签名URL(用于前端预览/下载)
|
||
*
|
||
* @description
|
||
* 生成一个带有过期时间的签名URL,前端可以直接使用该URL:
|
||
* - 在浏览器中预览 PDF
|
||
* - 下载文件(会恢复原始文件名)
|
||
*/
|
||
export async function getDocumentSignedUrl(
|
||
request: FastifyRequest<{
|
||
Params: {
|
||
id: string;
|
||
};
|
||
Querystring: {
|
||
/** 过期时间(秒),默认3600秒 */
|
||
expires?: string;
|
||
/** 是否作为附件下载(添加 Content-Disposition),默认 false */
|
||
download?: string;
|
||
};
|
||
}>,
|
||
reply: FastifyReply
|
||
) {
|
||
try {
|
||
const { id } = request.params;
|
||
const expires = parseInt(request.query.expires || '3600', 10);
|
||
const download = request.query.download === 'true';
|
||
|
||
const userId = getUserId(request);
|
||
|
||
// 获取文档信息
|
||
const document = await documentService.getDocumentById(userId, id);
|
||
|
||
// 检查是否有存储路径
|
||
if (!document.storageKey) {
|
||
logger.warn('[PKB] 文档没有存储路径,可能是旧数据', { documentId: id });
|
||
return reply.status(404).send({
|
||
success: false,
|
||
message: '文档文件不可用,请重新上传',
|
||
});
|
||
}
|
||
|
||
// 生成签名URL
|
||
let signedUrl: string;
|
||
|
||
// 检查存储适配器类型
|
||
if (storage instanceof OSSAdapter) {
|
||
// OSS: 使用带原始文件名的签名URL
|
||
signedUrl = download
|
||
? storage.getSignedUrl(document.storageKey, expires, document.filename)
|
||
: storage.getSignedUrl(document.storageKey, expires);
|
||
} else {
|
||
// 本地存储: 使用 getUrl
|
||
signedUrl = storage.getUrl(document.storageKey);
|
||
}
|
||
|
||
logger.info('[PKB] 生成签名URL', {
|
||
documentId: id,
|
||
filename: document.filename,
|
||
expires,
|
||
download
|
||
});
|
||
|
||
return reply.send({
|
||
success: true,
|
||
data: {
|
||
documentId: document.id,
|
||
filename: document.filename,
|
||
fileType: document.fileType,
|
||
url: signedUrl,
|
||
expiresIn: expires,
|
||
},
|
||
});
|
||
} catch (error: any) {
|
||
logger.error('[PKB] 获取签名URL失败', { error: error.message });
|
||
|
||
if (error.message.includes('not found') || error.message.includes('access denied')) {
|
||
return reply.status(404).send({
|
||
success: false,
|
||
message: error.message,
|
||
});
|
||
}
|
||
|
||
return reply.status(500).send({
|
||
success: false,
|
||
message: error.message || 'Failed to get signed URL',
|
||
});
|
||
}
|
||
}
|
||
|
||
|