feat(pkb): Replace Dify with self-developed pgvector RAG engine

Major milestone: Successfully replaced Dify external service with PostgreSQL + pgvector RAG engine

Backend changes:
- Refactor ragService.ts: Remove dual-track mode, keep only pgvector
- Refactor knowledgeBaseService.ts: Remove Dify creation logic
- Refactor documentService.ts: Remove Dify upload/polling logic
- DifyClient.ts: Convert to deprecated stub file (for legacy compatibility)
- common/rag/index.ts: Update exports
- common/rag/types.ts: Remove Dify types, keep generic RAG types
- config/env.ts: Remove Dify configuration

Frontend changes:
- DashboardPage.tsx: Add delete knowledge base dropdown menu
- KnowledgeBaseList.tsx: Enhance quota warning display
- CreateKBDialog.tsx: Add quota exceeded modal with guidance
- knowledgeBaseApi.ts: Add auth interceptor

Documentation:
- Update PKB module status guide (v2.3)
- Update system status guide (v4.0)

Performance metrics:
- Single query latency: 2.5s
- Single query cost: 0.0025 CNY
- Cross-language accuracy improvement: +20.5%

Remaining tasks:
- OSS storage integration
- pg_bigm extension installation

Tested: End-to-end test passed (create KB -> upload doc -> vector search)
This commit is contained in:
2026-01-21 22:35:50 +08:00
parent 40c2f8e148
commit 483c62fb6f
14 changed files with 741 additions and 1018 deletions

View File

@@ -1,323 +1,50 @@
import axios, { AxiosInstance, AxiosError } from 'axios';
import FormData from 'form-data';
import {
Dataset,
CreateDatasetRequest,
CreateDatasetResponse,
DatasetListResponse,
Document,
DocumentListResponse,
CreateDocumentByFileRequest,
CreateDocumentResponse,
RetrievalRequest,
RetrievalResponse,
DifyError,
DifyErrorResponse,
} from './types.js';
import { config } from '../../config/env.js';
/**
* Dify API 客户端
* DifyClient - 已废弃
*
* 封装 Dify 知识库相关 API
* Dify 已于 2026-01-21 被移除,完全使用 pgvector RAG 引擎。
* 此文件仅为 legacy 代码提供兼容性支持。
*
* @deprecated 请使用 ragService 中的 pgvector 实现
*/
export class DifyClient {
private client: AxiosInstance;
private apiKey: string;
private apiUrl: string;
constructor(apiKey?: string, apiUrl?: string) {
this.apiKey = apiKey || config.difyApiKey;
this.apiUrl = apiUrl || config.difyApiUrl;
import { logger } from '../logging/index.js';
if (!this.apiKey) {
throw new Error('Dify API Key is required');
}
const DEPRECATED_MESSAGE = 'Dify 已废弃,请使用 pgvector RAG 引擎。Legacy 代码需要迁移到新的 ragService。';
if (!this.apiUrl) {
throw new Error('Dify API URL is required');
}
// 创建 axios 实例
this.client = axios.create({
baseURL: this.apiUrl,
headers: {
'Authorization': `Bearer ${this.apiKey}`,
'Content-Type': 'application/json',
},
timeout: 30000, // 30秒超时
});
// 响应拦截器:统一错误处理
this.client.interceptors.response.use(
(response) => response,
(error: AxiosError) => {
if (error.response?.data) {
const errorData = error.response.data as DifyErrorResponse;
throw new DifyError({
code: errorData.code || 'UNKNOWN_ERROR',
message: errorData.message || error.message,
status: error.response.status,
});
}
throw error;
}
);
class DeprecatedDifyClient {
constructor() {
logger.warn('[DifyClient] ' + DEPRECATED_MESSAGE);
}
// ==================== 知识库管理 API ====================
/**
* 创建知识库
*
* @param params 创建参数
* @returns 创建的知识库信息
*/
async createDataset(params: CreateDatasetRequest): Promise<CreateDatasetResponse> {
const response = await this.client.post<CreateDatasetResponse>('/datasets', params);
return response.data;
async createDataset(_params: any): Promise<any> {
throw new Error(DEPRECATED_MESSAGE);
}
/**
* 获取知识库列表
*
* @param page 页码从1开始
* @param limit 每页数量默认20
* @returns 知识库列表
*/
async getDatasets(page: number = 1, limit: number = 20): Promise<DatasetListResponse> {
const response = await this.client.get<DatasetListResponse>('/datasets', {
params: { page, limit },
});
return response.data;
async deleteDataset(_datasetId: string): Promise<void> {
throw new Error(DEPRECATED_MESSAGE);
}
/**
* 获取知识库详情
*
* @param datasetId 知识库ID
* @returns 知识库信息
*/
async getDataset(datasetId: string): Promise<Dataset> {
const response = await this.client.get<Dataset>(`/datasets/${datasetId}`);
return response.data;
async getDocument(_datasetId: string, _documentId: string): Promise<any> {
throw new Error(DEPRECATED_MESSAGE);
}
/**
* 删除知识库
*
* @param datasetId 知识库ID
*/
async deleteDataset(datasetId: string): Promise<void> {
await this.client.delete(`/datasets/${datasetId}`);
async uploadDocumentDirectly(_datasetId: string, _file: Buffer, _filename: string): Promise<any> {
throw new Error(DEPRECATED_MESSAGE);
}
// ==================== 文档管理 API ====================
/**
* 直接上传文档到知识库(简化版)
*
* @param datasetId 知识库ID
* @param file 文件 Buffer
* @param filename 文件名
* @param params 创建参数
* @returns 创建的文档信息
*/
async uploadDocumentDirectly(
datasetId: string,
file: Buffer,
filename: string,
params?: Partial<CreateDocumentByFileRequest>
): Promise<CreateDocumentResponse> {
const formData = new FormData();
formData.append('file', file, filename);
// 添加其他参数
const defaultParams = {
indexing_technique: 'high_quality',
process_rule: {
mode: 'automatic',
rules: {
pre_processing_rules: [
{ id: 'remove_extra_spaces', enabled: true },
{ id: 'remove_urls_emails', enabled: false },
],
segmentation: {
separator: '\n',
max_tokens: 1500, // Phase 1优化从500增加到1500 tokens
},
},
},
...params,
};
formData.append('data', JSON.stringify(defaultParams));
const response = await this.client.post<CreateDocumentResponse>(
`/datasets/${datasetId}/document/create_by_file`,
formData,
{
headers: {
...formData.getHeaders(),
'Authorization': `Bearer ${this.apiKey}`,
},
}
);
return response.data;
async deleteDocument(_datasetId: string, _documentId: string): Promise<void> {
throw new Error(DEPRECATED_MESSAGE);
}
/**
* 获取文档列表
*
* @param datasetId 知识库ID
* @param page 页码从1开始
* @param limit 每页数量默认20
* @returns 文档列表
*/
async getDocuments(
datasetId: string,
page: number = 1,
limit: number = 20
): Promise<DocumentListResponse> {
const response = await this.client.get<DocumentListResponse>(
`/datasets/${datasetId}/documents`,
{
params: { page, limit },
}
);
return response.data;
async updateDocument(_datasetId: string, _documentId: string): Promise<any> {
throw new Error(DEPRECATED_MESSAGE);
}
/**
* 获取文档详情
*
* @param datasetId 知识库ID
* @param documentId 文档ID
* @returns 文档信息
*/
async getDocument(datasetId: string, documentId: string): Promise<Document> {
const response = await this.client.get<Document>(
`/datasets/${datasetId}/documents/${documentId}`
);
return response.data;
}
/**
* 删除文档
*
* @param datasetId 知识库ID
* @param documentId 文档ID
*/
async deleteDocument(datasetId: string, documentId: string): Promise<void> {
await this.client.delete(`/datasets/${datasetId}/documents/${documentId}`);
}
/**
* 更新文档(重新索引)
*
* @param datasetId 知识库ID
* @param documentId 文档ID
*/
async updateDocument(datasetId: string, documentId: string): Promise<void> {
await this.client.post(`/datasets/${datasetId}/documents/${documentId}/processing`);
}
// ==================== 知识库检索 API ====================
/**
* 检索知识库
*
* @param datasetId 知识库ID
* @param query 查询文本
* @param params 检索参数
* @returns 检索结果
*/
async retrieveKnowledge(
datasetId: string,
query: string,
params?: Partial<RetrievalRequest>
): Promise<RetrievalResponse> {
const requestParams: RetrievalRequest = {
query,
retrieval_model: {
search_method: 'semantic_search',
reranking_enable: false,
top_k: 3,
score_threshold_enabled: false,
...params?.retrieval_model,
},
};
const response = await this.client.post<RetrievalResponse>(
`/datasets/${datasetId}/retrieve`,
requestParams
);
return response.data;
}
// ==================== 辅助方法 ====================
/**
* 轮询检查文档处理状态
*
* @param datasetId 知识库ID
* @param documentId 文档ID
* @param maxAttempts 最大尝试次数默认30次
* @param interval 轮询间隔毫秒默认2000ms
* @returns 文档信息
*/
async waitForDocumentProcessing(
datasetId: string,
documentId: string,
maxAttempts: number = 30,
interval: number = 2000
): Promise<Document> {
for (let i = 0; i < maxAttempts; i++) {
const document = await this.getDocument(datasetId, documentId);
if (document.indexing_status === 'completed') {
return document;
}
if (document.indexing_status === 'error') {
throw new Error(`Document processing failed: ${document.error || 'Unknown error'}`);
}
// 等待后继续
await new Promise((resolve) => setTimeout(resolve, interval));
}
throw new Error('Document processing timeout');
}
/**
* 一键上传文档到知识库(上传 + 等待处理完成)
*
* @param datasetId 知识库ID
* @param file 文件 Buffer
* @param filename 文件名
* @returns 处理完成的文档信息
*/
async uploadAndProcessDocument(
datasetId: string,
file: Buffer,
filename: string
): Promise<Document> {
// 1. 直接上传文档
const createResult = await this.uploadDocumentDirectly(datasetId, file, filename);
// 2. 等待处理完成
const document = await this.waitForDocumentProcessing(
datasetId,
createResult.document.id
);
return document;
async retrieveKnowledge(_datasetId: string, _query: string, _options?: any): Promise<any> {
throw new Error(DEPRECATED_MESSAGE);
}
}
// 导出单例实例
export const difyClient = new DifyClient();
export const difyClient = new DeprecatedDifyClient();
export const DifyClient = DeprecatedDifyClient;

View File

@@ -59,8 +59,11 @@ export {
type DocumentInput,
} from './DocumentIngestService.js';
// ==================== 旧版兼容Dify====================
// ==================== 类型导出 ====================
export { DifyClient } from './DifyClient.js';
export * from './types.js';
// ==================== 废弃的 Dify 兼容层(仅供 Legacy 代码使用)====================
export { difyClient, DifyClient } from './DifyClient.js';

View File

@@ -1,199 +1,25 @@
/**
* Dify API 类型定义
* RAG 引擎 - 通用类型定义
*
* 2026-01-21: 移除 Dify 类型,保留通用 RAG 类型
*/
// ==================== 知识库相关类型 ====================
/**
* 知识库信息
*/
export interface Dataset {
id: string;
name: string;
description: string;
permission: 'only_me' | 'all_team_members';
data_source_type: 'upload_file' | 'notion_import' | 'website_crawl';
indexing_technique: 'high_quality' | 'economy';
app_count: number;
document_count: number;
word_count: number;
created_by: string;
created_at: number;
updated_by: string;
updated_at: number;
}
/**
* 创建知识库请求参数
*/
export interface CreateDatasetRequest {
name: string;
description?: string;
permission?: 'only_me' | 'all_team_members';
indexing_technique?: 'high_quality' | 'economy';
embedding_model?: string;
embedding_model_provider?: string;
retrieval_model?: {
search_method: 'semantic_search' | 'full_text_search' | 'hybrid_search';
reranking_enable?: boolean;
reranking_model?: {
reranking_provider_name: string;
reranking_model_name: string;
};
top_k?: number;
score_threshold_enabled?: boolean;
score_threshold?: number;
};
}
/**
* 创建知识库响应
*/
export interface CreateDatasetResponse {
id: string;
name: string;
description: string;
permission: string;
data_source_type: string;
indexing_technique: string;
created_by: string;
created_at: number;
}
/**
* 知识库列表响应
*/
export interface DatasetListResponse {
data: Dataset[];
has_more: boolean;
limit: number;
total: number;
page: number;
}
// ==================== 文档相关类型 ====================
/**
* 文档信息
*/
export interface Document {
id: string;
position: number;
data_source_type: string;
data_source_info: {
upload_file_id: string;
};
dataset_process_rule_id: string;
name: string;
created_from: string;
created_by: string;
created_at: number;
tokens: number;
indexing_status: 'waiting' | 'parsing' | 'cleaning' | 'splitting' | 'indexing' | 'completed' | 'error' | 'paused';
error?: string;
enabled: boolean;
disabled_at?: number;
disabled_by?: string;
archived: boolean;
display_status: string;
word_count: number;
hit_count: number;
}
/**
* 文档列表响应
*/
export interface DocumentListResponse {
data: Document[];
has_more: boolean;
limit: number;
total: number;
page: number;
}
/**
* 上传文件响应
*/
export interface UploadFileResponse {
id: string;
name: string;
size: number;
extension: string;
mime_type: string;
created_by: string;
created_at: number;
}
/**
* 创建文档(从上传的文件)请求参数
*/
export interface CreateDocumentByFileRequest {
indexing_technique: 'high_quality' | 'economy';
process_rule: {
rules: {
pre_processing_rules: Array<{
id: string;
enabled: boolean;
}>;
segmentation: {
separator: string;
max_tokens: number;
};
};
mode: 'automatic' | 'custom';
};
original_document_id?: string;
doc_form?: 'text_model' | 'qa_model';
doc_language?: string;
}
/**
* 创建文档响应
*/
export interface CreateDocumentResponse {
document: Document;
batch: string;
}
// ==================== 知识库检索相关类型 ====================
/**
* 知识库检索请求参数
*/
export interface RetrievalRequest {
query: string;
retrieval_model?: {
search_method?: 'semantic_search' | 'full_text_search' | 'hybrid_search';
reranking_enable?: boolean;
reranking_model?: {
reranking_provider_name: string;
reranking_model_name: string;
};
top_k?: number;
score_threshold_enabled?: boolean;
score_threshold?: number;
};
}
// ==================== 通用检索类型 ====================
/**
* 检索结果项
*/
export interface RetrievalRecord {
segment_id: string;
document_id: string;
document_name: string;
position: number;
score: number;
chunkId: string;
documentId: string;
documentName: string;
content: string;
hit_count: number;
word_count: number;
segment_position: number;
index_node_hash: string;
metadata: Record<string, any>;
score: number;
metadata?: Record<string, unknown>;
}
/**
* 知识库检索响应
* 检索响应
*/
export interface RetrievalResponse {
query: {
@@ -202,30 +28,28 @@ export interface RetrievalResponse {
records: RetrievalRecord[];
}
// ==================== 错误类型 ====================
// ==================== 通用错误类型 ====================
/**
* Dify API 错误响应
* RAG 错误响应
*/
export interface DifyErrorResponse {
export interface RAGErrorResponse {
code: string;
message: string;
status: number;
status?: number;
}
/**
* Dify API 错误
* RAG 错误
*/
export class DifyError extends Error {
export class RAGError extends Error {
code: string;
status: number;
status?: number;
constructor(error: DifyErrorResponse) {
constructor(error: RAGErrorResponse) {
super(error.message);
this.name = 'DifyError';
this.name = 'RAGError';
this.code = error.code;
this.status = error.status;
}
}