import axios, { AxiosInstance, AxiosError } from 'axios'; import FormData from 'form-data'; import { Dataset, CreateDatasetRequest, CreateDatasetResponse, DatasetListResponse, Document, DocumentListResponse, CreateDocumentByFileRequest, CreateDocumentResponse, RetrievalRequest, RetrievalResponse, DifyError, DifyErrorResponse, } from './types.js'; import { config } from '../../config/env.js'; /** * Dify API 客户端 * * 封装 Dify 知识库相关 API */ export class DifyClient { private client: AxiosInstance; private apiKey: string; private apiUrl: string; constructor(apiKey?: string, apiUrl?: string) { this.apiKey = apiKey || config.difyApiKey; this.apiUrl = apiUrl || config.difyApiUrl; if (!this.apiKey) { throw new Error('Dify API Key is required'); } if (!this.apiUrl) { throw new Error('Dify API URL is required'); } // 创建 axios 实例 this.client = axios.create({ baseURL: this.apiUrl, headers: { 'Authorization': `Bearer ${this.apiKey}`, 'Content-Type': 'application/json', }, timeout: 30000, // 30秒超时 }); // 响应拦截器:统一错误处理 this.client.interceptors.response.use( (response) => response, (error: AxiosError) => { if (error.response?.data) { const errorData = error.response.data as DifyErrorResponse; throw new DifyError({ code: errorData.code || 'UNKNOWN_ERROR', message: errorData.message || error.message, status: error.response.status, }); } throw error; } ); } // ==================== 知识库管理 API ==================== /** * 创建知识库 * * @param params 创建参数 * @returns 创建的知识库信息 */ async createDataset(params: CreateDatasetRequest): Promise { const response = await this.client.post('/datasets', params); return response.data; } /** * 获取知识库列表 * * @param page 页码(从1开始) * @param limit 每页数量(默认20) * @returns 知识库列表 */ async getDatasets(page: number = 1, limit: number = 20): Promise { const response = await this.client.get('/datasets', { params: { page, limit }, }); return response.data; } /** * 获取知识库详情 * * @param datasetId 知识库ID * @returns 知识库信息 */ async getDataset(datasetId: string): Promise { const response = await this.client.get(`/datasets/${datasetId}`); return response.data; } /** * 删除知识库 * * @param datasetId 知识库ID */ async deleteDataset(datasetId: string): Promise { await this.client.delete(`/datasets/${datasetId}`); } // ==================== 文档管理 API ==================== /** * 直接上传文档到知识库(简化版) * * @param datasetId 知识库ID * @param file 文件 Buffer * @param filename 文件名 * @param params 创建参数 * @returns 创建的文档信息 */ async uploadDocumentDirectly( datasetId: string, file: Buffer, filename: string, params?: Partial ): Promise { const formData = new FormData(); formData.append('file', file, filename); // 添加其他参数 const defaultParams = { indexing_technique: 'high_quality', process_rule: { mode: 'automatic', rules: { pre_processing_rules: [ { id: 'remove_extra_spaces', enabled: true }, { id: 'remove_urls_emails', enabled: false }, ], segmentation: { separator: '\n', max_tokens: 1500, // Phase 1优化:从500增加到1500 tokens }, }, }, ...params, }; formData.append('data', JSON.stringify(defaultParams)); const response = await this.client.post( `/datasets/${datasetId}/document/create_by_file`, formData, { headers: { ...formData.getHeaders(), 'Authorization': `Bearer ${this.apiKey}`, }, } ); return response.data; } /** * 获取文档列表 * * @param datasetId 知识库ID * @param page 页码(从1开始) * @param limit 每页数量(默认20) * @returns 文档列表 */ async getDocuments( datasetId: string, page: number = 1, limit: number = 20 ): Promise { const response = await this.client.get( `/datasets/${datasetId}/documents`, { params: { page, limit }, } ); return response.data; } /** * 获取文档详情 * * @param datasetId 知识库ID * @param documentId 文档ID * @returns 文档信息 */ async getDocument(datasetId: string, documentId: string): Promise { const response = await this.client.get( `/datasets/${datasetId}/documents/${documentId}` ); return response.data; } /** * 删除文档 * * @param datasetId 知识库ID * @param documentId 文档ID */ async deleteDocument(datasetId: string, documentId: string): Promise { await this.client.delete(`/datasets/${datasetId}/documents/${documentId}`); } /** * 更新文档(重新索引) * * @param datasetId 知识库ID * @param documentId 文档ID */ async updateDocument(datasetId: string, documentId: string): Promise { await this.client.post(`/datasets/${datasetId}/documents/${documentId}/processing`); } // ==================== 知识库检索 API ==================== /** * 检索知识库 * * @param datasetId 知识库ID * @param query 查询文本 * @param params 检索参数 * @returns 检索结果 */ async retrieveKnowledge( datasetId: string, query: string, params?: Partial ): Promise { const requestParams: RetrievalRequest = { query, retrieval_model: { search_method: 'semantic_search', reranking_enable: false, top_k: 3, score_threshold_enabled: false, ...params?.retrieval_model, }, }; const response = await this.client.post( `/datasets/${datasetId}/retrieve`, requestParams ); return response.data; } // ==================== 辅助方法 ==================== /** * 轮询检查文档处理状态 * * @param datasetId 知识库ID * @param documentId 文档ID * @param maxAttempts 最大尝试次数(默认30次) * @param interval 轮询间隔(毫秒,默认2000ms) * @returns 文档信息 */ async waitForDocumentProcessing( datasetId: string, documentId: string, maxAttempts: number = 30, interval: number = 2000 ): Promise { for (let i = 0; i < maxAttempts; i++) { const document = await this.getDocument(datasetId, documentId); if (document.indexing_status === 'completed') { return document; } if (document.indexing_status === 'error') { throw new Error(`Document processing failed: ${document.error || 'Unknown error'}`); } // 等待后继续 await new Promise((resolve) => setTimeout(resolve, interval)); } throw new Error('Document processing timeout'); } /** * 一键上传文档到知识库(上传 + 等待处理完成) * * @param datasetId 知识库ID * @param file 文件 Buffer * @param filename 文件名 * @returns 处理完成的文档信息 */ async uploadAndProcessDocument( datasetId: string, file: Buffer, filename: string ): Promise { // 1. 直接上传文档 const createResult = await this.uploadDocumentDirectly(datasetId, file, filename); // 2. 等待处理完成 const document = await this.waitForDocumentProcessing( datasetId, createResult.document.id ); return document; } } // 导出单例实例 export const difyClient = new DifyClient();