feat(backend): implement knowledge base management backend API
This commit is contained in:
327
backend/src/services/documentService.ts
Normal file
327
backend/src/services/documentService.ts
Normal file
@@ -0,0 +1,327 @@
|
||||
import { prisma } from '../config/database.js';
|
||||
import { difyClient } from '../clients/DifyClient.js';
|
||||
|
||||
/**
|
||||
* 文档服务
|
||||
*/
|
||||
|
||||
/**
|
||||
* 上传文档到知识库
|
||||
*/
|
||||
export async function uploadDocument(
|
||||
userId: string,
|
||||
kbId: string,
|
||||
file: Buffer,
|
||||
filename: string,
|
||||
fileType: string,
|
||||
fileSizeBytes: number,
|
||||
fileUrl: string
|
||||
) {
|
||||
// 1. 验证知识库权限
|
||||
const knowledgeBase = await prisma.knowledgeBase.findFirst({
|
||||
where: {
|
||||
id: kbId,
|
||||
userId,
|
||||
},
|
||||
});
|
||||
|
||||
if (!knowledgeBase) {
|
||||
throw new Error('Knowledge base not found or access denied');
|
||||
}
|
||||
|
||||
// 2. 检查文档数量限制(每个知识库最多50个文档)
|
||||
const documentCount = await prisma.document.count({
|
||||
where: { kbId },
|
||||
});
|
||||
|
||||
if (documentCount >= 50) {
|
||||
throw new Error('Document limit exceeded. Maximum 50 documents per knowledge base');
|
||||
}
|
||||
|
||||
// 3. 在数据库中创建文档记录(状态:uploading)
|
||||
const document = await prisma.document.create({
|
||||
data: {
|
||||
kbId,
|
||||
userId,
|
||||
filename,
|
||||
fileType,
|
||||
fileSizeBytes,
|
||||
fileUrl,
|
||||
difyDocumentId: '', // 暂时为空,稍后更新
|
||||
status: 'uploading',
|
||||
progress: 0,
|
||||
},
|
||||
});
|
||||
|
||||
try {
|
||||
// 4. 上传到Dify
|
||||
const difyResult = await difyClient.uploadDocumentDirectly(
|
||||
knowledgeBase.difyDatasetId,
|
||||
file,
|
||||
filename
|
||||
);
|
||||
|
||||
// 5. 更新文档记录(更新difyDocumentId和状态)
|
||||
const updatedDocument = await prisma.document.update({
|
||||
where: { id: document.id },
|
||||
data: {
|
||||
difyDocumentId: difyResult.document.id,
|
||||
status: difyResult.document.indexing_status,
|
||||
progress: 50,
|
||||
},
|
||||
});
|
||||
|
||||
// 6. 启动后台轮询任务,等待处理完成
|
||||
pollDocumentStatus(userId, kbId, document.id, difyResult.document.id).catch(error => {
|
||||
console.error('Failed to poll document status:', error);
|
||||
});
|
||||
|
||||
// 7. 更新知识库统计
|
||||
await updateKnowledgeBaseStats(kbId);
|
||||
|
||||
// 8. 转换BigInt为Number
|
||||
return {
|
||||
...updatedDocument,
|
||||
fileSizeBytes: Number(updatedDocument.fileSizeBytes),
|
||||
};
|
||||
} catch (error) {
|
||||
// 上传失败,更新状态为error
|
||||
await prisma.document.update({
|
||||
where: { id: document.id },
|
||||
data: {
|
||||
status: 'error',
|
||||
errorMessage: error instanceof Error ? error.message : 'Upload failed',
|
||||
},
|
||||
});
|
||||
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 轮询文档处理状态
|
||||
*/
|
||||
async function pollDocumentStatus(
|
||||
userId: string,
|
||||
kbId: string,
|
||||
documentId: string,
|
||||
difyDocumentId: string,
|
||||
maxAttempts: number = 30
|
||||
) {
|
||||
const knowledgeBase = await prisma.knowledgeBase.findFirst({
|
||||
where: { id: kbId, userId },
|
||||
});
|
||||
|
||||
if (!knowledgeBase) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (let i = 0; i < maxAttempts; i++) {
|
||||
await new Promise(resolve => setTimeout(resolve, 2000)); // 等待2秒
|
||||
|
||||
try {
|
||||
// 查询Dify中的文档状态
|
||||
const difyDocument = await difyClient.getDocument(
|
||||
knowledgeBase.difyDatasetId,
|
||||
difyDocumentId
|
||||
);
|
||||
|
||||
// 更新数据库中的状态
|
||||
await prisma.document.update({
|
||||
where: { id: documentId },
|
||||
data: {
|
||||
status: difyDocument.indexing_status,
|
||||
progress: difyDocument.indexing_status === 'completed' ? 100 : 50 + (i * 2),
|
||||
segmentsCount: difyDocument.indexing_status === 'completed' ? difyDocument.word_count : null,
|
||||
tokensCount: difyDocument.indexing_status === 'completed' ? difyDocument.tokens : null,
|
||||
processedAt: difyDocument.indexing_status === 'completed' ? new Date() : null,
|
||||
errorMessage: difyDocument.error || null,
|
||||
},
|
||||
});
|
||||
|
||||
// 如果完成或失败,退出轮询
|
||||
if (difyDocument.indexing_status === 'completed') {
|
||||
await updateKnowledgeBaseStats(kbId);
|
||||
break;
|
||||
}
|
||||
|
||||
if (difyDocument.indexing_status === 'error') {
|
||||
break;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Polling attempt ${i + 1} failed:`, error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取文档列表
|
||||
*/
|
||||
export async function getDocuments(userId: string, kbId: string) {
|
||||
// 1. 验证权限
|
||||
const knowledgeBase = await prisma.knowledgeBase.findFirst({
|
||||
where: {
|
||||
id: kbId,
|
||||
userId,
|
||||
},
|
||||
});
|
||||
|
||||
if (!knowledgeBase) {
|
||||
throw new Error('Knowledge base not found or access denied');
|
||||
}
|
||||
|
||||
// 2. 查询文档列表
|
||||
const documents = await prisma.document.findMany({
|
||||
where: { kbId },
|
||||
orderBy: { uploadedAt: 'desc' },
|
||||
});
|
||||
|
||||
// 3. 转换BigInt为Number
|
||||
return documents.map(doc => ({
|
||||
...doc,
|
||||
fileSizeBytes: Number(doc.fileSizeBytes),
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取文档详情
|
||||
*/
|
||||
export async function getDocumentById(userId: string, documentId: string) {
|
||||
const document = await prisma.document.findFirst({
|
||||
where: {
|
||||
id: documentId,
|
||||
userId, // 确保只能访问自己的文档
|
||||
},
|
||||
include: {
|
||||
knowledgeBase: true,
|
||||
},
|
||||
});
|
||||
|
||||
if (!document) {
|
||||
throw new Error('Document not found or access denied');
|
||||
}
|
||||
|
||||
// 转换BigInt为Number
|
||||
return {
|
||||
...document,
|
||||
fileSizeBytes: Number(document.fileSizeBytes),
|
||||
knowledgeBase: {
|
||||
...document.knowledgeBase,
|
||||
totalSizeBytes: Number(document.knowledgeBase.totalSizeBytes),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除文档
|
||||
*/
|
||||
export async function deleteDocument(userId: string, documentId: string) {
|
||||
// 1. 查询文档信息
|
||||
const document = await prisma.document.findFirst({
|
||||
where: {
|
||||
id: documentId,
|
||||
userId,
|
||||
},
|
||||
include: {
|
||||
knowledgeBase: true,
|
||||
},
|
||||
});
|
||||
|
||||
if (!document) {
|
||||
throw new Error('Document not found or access denied');
|
||||
}
|
||||
|
||||
// 2. 删除Dify中的文档
|
||||
if (document.difyDocumentId) {
|
||||
try {
|
||||
await difyClient.deleteDocument(
|
||||
document.knowledgeBase.difyDatasetId,
|
||||
document.difyDocumentId
|
||||
);
|
||||
} catch (error) {
|
||||
console.error('Failed to delete Dify document:', error);
|
||||
// 继续删除本地记录
|
||||
}
|
||||
}
|
||||
|
||||
// 3. 删除数据库记录
|
||||
await prisma.document.delete({
|
||||
where: { id: documentId },
|
||||
});
|
||||
|
||||
// 4. 更新知识库统计
|
||||
await updateKnowledgeBaseStats(document.kbId);
|
||||
}
|
||||
|
||||
/**
|
||||
* 重新处理文档
|
||||
*/
|
||||
export async function reprocessDocument(userId: string, documentId: string) {
|
||||
// 1. 查询文档信息
|
||||
const document = await prisma.document.findFirst({
|
||||
where: {
|
||||
id: documentId,
|
||||
userId,
|
||||
},
|
||||
include: {
|
||||
knowledgeBase: true,
|
||||
},
|
||||
});
|
||||
|
||||
if (!document) {
|
||||
throw new Error('Document not found or access denied');
|
||||
}
|
||||
|
||||
// 2. 触发Dify重新索引
|
||||
if (document.difyDocumentId) {
|
||||
try {
|
||||
await difyClient.updateDocument(
|
||||
document.knowledgeBase.difyDatasetId,
|
||||
document.difyDocumentId
|
||||
);
|
||||
|
||||
// 3. 更新状态为processing
|
||||
await prisma.document.update({
|
||||
where: { id: documentId },
|
||||
data: {
|
||||
status: 'parsing',
|
||||
progress: 0,
|
||||
errorMessage: null,
|
||||
},
|
||||
});
|
||||
|
||||
// 4. 启动轮询
|
||||
pollDocumentStatus(
|
||||
userId,
|
||||
document.kbId,
|
||||
documentId,
|
||||
document.difyDocumentId
|
||||
).catch(error => {
|
||||
console.error('Failed to poll document status:', error);
|
||||
});
|
||||
} catch (error) {
|
||||
throw new Error('Failed to reprocess document');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 更新知识库统计信息
|
||||
*/
|
||||
async function updateKnowledgeBaseStats(kbId: string) {
|
||||
const documents = await prisma.document.findMany({
|
||||
where: { kbId },
|
||||
});
|
||||
|
||||
const totalSizeBytes = documents.reduce((sum, d) => sum + Number(d.fileSizeBytes), 0);
|
||||
const fileCount = documents.length;
|
||||
|
||||
await prisma.knowledgeBase.update({
|
||||
where: { id: kbId },
|
||||
data: {
|
||||
fileCount,
|
||||
totalSizeBytes: BigInt(totalSizeBytes),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
261
backend/src/services/knowledgeBaseService.ts
Normal file
261
backend/src/services/knowledgeBaseService.ts
Normal file
@@ -0,0 +1,261 @@
|
||||
import { prisma } from '../config/database.js';
|
||||
import { difyClient } from '../clients/DifyClient.js';
|
||||
|
||||
/**
|
||||
* 知识库服务
|
||||
*/
|
||||
|
||||
/**
|
||||
* 创建知识库
|
||||
*/
|
||||
export async function createKnowledgeBase(
|
||||
userId: string,
|
||||
name: string,
|
||||
description?: string
|
||||
) {
|
||||
// 1. 检查用户知识库配额
|
||||
const user = await prisma.user.findUnique({
|
||||
where: { id: userId },
|
||||
select: { kbQuota: true, kbUsed: true }
|
||||
});
|
||||
|
||||
if (!user) {
|
||||
throw new Error('User not found');
|
||||
}
|
||||
|
||||
if (user.kbUsed >= user.kbQuota) {
|
||||
throw new Error(`Knowledge base quota exceeded. Maximum: ${user.kbQuota}`);
|
||||
}
|
||||
|
||||
// 2. 在Dify中创建Dataset
|
||||
const difyDataset = await difyClient.createDataset({
|
||||
name: `${userId}_${name}_${Date.now()}`,
|
||||
description: description || `Knowledge base for user ${userId}`,
|
||||
indexing_technique: 'high_quality',
|
||||
});
|
||||
|
||||
// 3. 在数据库中创建记录
|
||||
const knowledgeBase = await prisma.knowledgeBase.create({
|
||||
data: {
|
||||
userId,
|
||||
name,
|
||||
description,
|
||||
difyDatasetId: difyDataset.id,
|
||||
},
|
||||
});
|
||||
|
||||
// 4. 更新用户的知识库使用计数
|
||||
await prisma.user.update({
|
||||
where: { id: userId },
|
||||
data: {
|
||||
kbUsed: { increment: 1 },
|
||||
},
|
||||
});
|
||||
|
||||
// 5. 转换BigInt为Number
|
||||
return {
|
||||
...knowledgeBase,
|
||||
totalSizeBytes: Number(knowledgeBase.totalSizeBytes),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取用户的知识库列表
|
||||
*/
|
||||
export async function getKnowledgeBases(userId: string) {
|
||||
const knowledgeBases = await prisma.knowledgeBase.findMany({
|
||||
where: { userId },
|
||||
orderBy: { createdAt: 'desc' },
|
||||
include: {
|
||||
_count: {
|
||||
select: { documents: true },
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
// 转换BigInt为Number
|
||||
return knowledgeBases.map(kb => ({
|
||||
...kb,
|
||||
totalSizeBytes: Number(kb.totalSizeBytes),
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取知识库详情
|
||||
*/
|
||||
export async function getKnowledgeBaseById(userId: string, kbId: string) {
|
||||
const knowledgeBase = await prisma.knowledgeBase.findFirst({
|
||||
where: {
|
||||
id: kbId,
|
||||
userId, // 确保只能访问自己的知识库
|
||||
},
|
||||
include: {
|
||||
documents: {
|
||||
orderBy: { uploadedAt: 'desc' },
|
||||
},
|
||||
_count: {
|
||||
select: { documents: true },
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
if (!knowledgeBase) {
|
||||
throw new Error('Knowledge base not found or access denied');
|
||||
}
|
||||
|
||||
// 转换BigInt为Number
|
||||
const result = {
|
||||
...knowledgeBase,
|
||||
totalSizeBytes: Number(knowledgeBase.totalSizeBytes),
|
||||
documents: knowledgeBase.documents.map(doc => ({
|
||||
...doc,
|
||||
fileSizeBytes: Number(doc.fileSizeBytes),
|
||||
})),
|
||||
};
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* 更新知识库
|
||||
*/
|
||||
export async function updateKnowledgeBase(
|
||||
userId: string,
|
||||
kbId: string,
|
||||
data: { name?: string; description?: string }
|
||||
) {
|
||||
// 1. 验证权限
|
||||
const existingKb = await prisma.knowledgeBase.findFirst({
|
||||
where: {
|
||||
id: kbId,
|
||||
userId,
|
||||
},
|
||||
});
|
||||
|
||||
if (!existingKb) {
|
||||
throw new Error('Knowledge base not found or access denied');
|
||||
}
|
||||
|
||||
// 2. 更新数据库
|
||||
const knowledgeBase = await prisma.knowledgeBase.update({
|
||||
where: { id: kbId },
|
||||
data,
|
||||
});
|
||||
|
||||
// 3. 转换BigInt为Number
|
||||
return {
|
||||
...knowledgeBase,
|
||||
totalSizeBytes: Number(knowledgeBase.totalSizeBytes),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除知识库
|
||||
*/
|
||||
export async function deleteKnowledgeBase(userId: string, kbId: string) {
|
||||
// 1. 验证权限
|
||||
const knowledgeBase = await prisma.knowledgeBase.findFirst({
|
||||
where: {
|
||||
id: kbId,
|
||||
userId,
|
||||
},
|
||||
});
|
||||
|
||||
if (!knowledgeBase) {
|
||||
throw new Error('Knowledge base not found or access denied');
|
||||
}
|
||||
|
||||
// 2. 删除Dify中的Dataset
|
||||
try {
|
||||
await difyClient.deleteDataset(knowledgeBase.difyDatasetId);
|
||||
} catch (error) {
|
||||
console.error('Failed to delete Dify dataset:', error);
|
||||
// 继续删除本地记录,即使Dify删除失败
|
||||
}
|
||||
|
||||
// 3. 删除数据库记录(会级联删除documents)
|
||||
await prisma.knowledgeBase.delete({
|
||||
where: { id: kbId },
|
||||
});
|
||||
|
||||
// 4. 更新用户的知识库使用计数
|
||||
await prisma.user.update({
|
||||
where: { id: userId },
|
||||
data: {
|
||||
kbUsed: { decrement: 1 },
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 检索知识库
|
||||
*/
|
||||
export async function searchKnowledgeBase(
|
||||
userId: string,
|
||||
kbId: string,
|
||||
query: string,
|
||||
topK: number = 3
|
||||
) {
|
||||
// 1. 验证权限
|
||||
const knowledgeBase = await prisma.knowledgeBase.findFirst({
|
||||
where: {
|
||||
id: kbId,
|
||||
userId,
|
||||
},
|
||||
});
|
||||
|
||||
if (!knowledgeBase) {
|
||||
throw new Error('Knowledge base not found or access denied');
|
||||
}
|
||||
|
||||
// 2. 调用Dify检索API
|
||||
const results = await difyClient.retrieveKnowledge(
|
||||
knowledgeBase.difyDatasetId,
|
||||
query,
|
||||
{
|
||||
retrieval_model: {
|
||||
search_method: 'semantic_search',
|
||||
top_k: topK,
|
||||
score_threshold_enabled: true,
|
||||
score_threshold: 0.3,
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取知识库统计信息
|
||||
*/
|
||||
export async function getKnowledgeBaseStats(userId: string, kbId: string) {
|
||||
// 1. 验证权限
|
||||
const knowledgeBase = await prisma.knowledgeBase.findFirst({
|
||||
where: {
|
||||
id: kbId,
|
||||
userId,
|
||||
},
|
||||
include: {
|
||||
documents: true,
|
||||
},
|
||||
});
|
||||
|
||||
if (!knowledgeBase) {
|
||||
throw new Error('Knowledge base not found or access denied');
|
||||
}
|
||||
|
||||
// 2. 统计信息
|
||||
const stats = {
|
||||
totalDocuments: knowledgeBase.documents.length,
|
||||
completedDocuments: knowledgeBase.documents.filter(d => d.status === 'completed').length,
|
||||
processingDocuments: knowledgeBase.documents.filter(d =>
|
||||
['uploading', 'parsing', 'indexing'].includes(d.status)
|
||||
).length,
|
||||
errorDocuments: knowledgeBase.documents.filter(d => d.status === 'error').length,
|
||||
totalSizeBytes: knowledgeBase.totalSizeBytes,
|
||||
totalTokens: knowledgeBase.documents.reduce((sum, d) => sum + (d.tokensCount || 0), 0),
|
||||
};
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user