Files
AIclinicalresearch/backend/src/common/rag/DifyClient.ts
HaHafeng 0c5310fb77 refactor(backend): incremental architecture evolution (Task 19)
- Add common/ layer for shared capabilities (LLM, RAG, document, middleware)
- Add legacy/ layer for existing business code
- Move files to new structure (controllers, routes, services)
- Update index.ts for new route registration
- System remains fully functional
2025-11-16 15:42:44 +08:00

324 lines
8.1 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import axios, { AxiosInstance, AxiosError } from 'axios';
import FormData from 'form-data';
import {
Dataset,
CreateDatasetRequest,
CreateDatasetResponse,
DatasetListResponse,
Document,
DocumentListResponse,
CreateDocumentByFileRequest,
CreateDocumentResponse,
RetrievalRequest,
RetrievalResponse,
DifyError,
DifyErrorResponse,
} from './types.js';
import { config } from '../../config/env.js';
/**
* Dify API 客户端
*
* 封装 Dify 知识库相关 API
*/
export class DifyClient {
private client: AxiosInstance;
private apiKey: string;
private apiUrl: string;
constructor(apiKey?: string, apiUrl?: string) {
this.apiKey = apiKey || config.difyApiKey;
this.apiUrl = apiUrl || config.difyApiUrl;
if (!this.apiKey) {
throw new Error('Dify API Key is required');
}
if (!this.apiUrl) {
throw new Error('Dify API URL is required');
}
// 创建 axios 实例
this.client = axios.create({
baseURL: this.apiUrl,
headers: {
'Authorization': `Bearer ${this.apiKey}`,
'Content-Type': 'application/json',
},
timeout: 30000, // 30秒超时
});
// 响应拦截器:统一错误处理
this.client.interceptors.response.use(
(response) => response,
(error: AxiosError) => {
if (error.response?.data) {
const errorData = error.response.data as DifyErrorResponse;
throw new DifyError({
code: errorData.code || 'UNKNOWN_ERROR',
message: errorData.message || error.message,
status: error.response.status,
});
}
throw error;
}
);
}
// ==================== 知识库管理 API ====================
/**
* 创建知识库
*
* @param params 创建参数
* @returns 创建的知识库信息
*/
async createDataset(params: CreateDatasetRequest): Promise<CreateDatasetResponse> {
const response = await this.client.post<CreateDatasetResponse>('/datasets', params);
return response.data;
}
/**
* 获取知识库列表
*
* @param page 页码从1开始
* @param limit 每页数量默认20
* @returns 知识库列表
*/
async getDatasets(page: number = 1, limit: number = 20): Promise<DatasetListResponse> {
const response = await this.client.get<DatasetListResponse>('/datasets', {
params: { page, limit },
});
return response.data;
}
/**
* 获取知识库详情
*
* @param datasetId 知识库ID
* @returns 知识库信息
*/
async getDataset(datasetId: string): Promise<Dataset> {
const response = await this.client.get<Dataset>(`/datasets/${datasetId}`);
return response.data;
}
/**
* 删除知识库
*
* @param datasetId 知识库ID
*/
async deleteDataset(datasetId: string): Promise<void> {
await this.client.delete(`/datasets/${datasetId}`);
}
// ==================== 文档管理 API ====================
/**
* 直接上传文档到知识库(简化版)
*
* @param datasetId 知识库ID
* @param file 文件 Buffer
* @param filename 文件名
* @param params 创建参数
* @returns 创建的文档信息
*/
async uploadDocumentDirectly(
datasetId: string,
file: Buffer,
filename: string,
params?: Partial<CreateDocumentByFileRequest>
): Promise<CreateDocumentResponse> {
const formData = new FormData();
formData.append('file', file, filename);
// 添加其他参数
const defaultParams = {
indexing_technique: 'high_quality',
process_rule: {
mode: 'automatic',
rules: {
pre_processing_rules: [
{ id: 'remove_extra_spaces', enabled: true },
{ id: 'remove_urls_emails', enabled: false },
],
segmentation: {
separator: '\n',
max_tokens: 1500, // Phase 1优化从500增加到1500 tokens
},
},
},
...params,
};
formData.append('data', JSON.stringify(defaultParams));
const response = await this.client.post<CreateDocumentResponse>(
`/datasets/${datasetId}/document/create_by_file`,
formData,
{
headers: {
...formData.getHeaders(),
'Authorization': `Bearer ${this.apiKey}`,
},
}
);
return response.data;
}
/**
* 获取文档列表
*
* @param datasetId 知识库ID
* @param page 页码从1开始
* @param limit 每页数量默认20
* @returns 文档列表
*/
async getDocuments(
datasetId: string,
page: number = 1,
limit: number = 20
): Promise<DocumentListResponse> {
const response = await this.client.get<DocumentListResponse>(
`/datasets/${datasetId}/documents`,
{
params: { page, limit },
}
);
return response.data;
}
/**
* 获取文档详情
*
* @param datasetId 知识库ID
* @param documentId 文档ID
* @returns 文档信息
*/
async getDocument(datasetId: string, documentId: string): Promise<Document> {
const response = await this.client.get<Document>(
`/datasets/${datasetId}/documents/${documentId}`
);
return response.data;
}
/**
* 删除文档
*
* @param datasetId 知识库ID
* @param documentId 文档ID
*/
async deleteDocument(datasetId: string, documentId: string): Promise<void> {
await this.client.delete(`/datasets/${datasetId}/documents/${documentId}`);
}
/**
* 更新文档(重新索引)
*
* @param datasetId 知识库ID
* @param documentId 文档ID
*/
async updateDocument(datasetId: string, documentId: string): Promise<void> {
await this.client.post(`/datasets/${datasetId}/documents/${documentId}/processing`);
}
// ==================== 知识库检索 API ====================
/**
* 检索知识库
*
* @param datasetId 知识库ID
* @param query 查询文本
* @param params 检索参数
* @returns 检索结果
*/
async retrieveKnowledge(
datasetId: string,
query: string,
params?: Partial<RetrievalRequest>
): Promise<RetrievalResponse> {
const requestParams: RetrievalRequest = {
query,
retrieval_model: {
search_method: 'semantic_search',
reranking_enable: false,
top_k: 3,
score_threshold_enabled: false,
...params?.retrieval_model,
},
};
const response = await this.client.post<RetrievalResponse>(
`/datasets/${datasetId}/retrieve`,
requestParams
);
return response.data;
}
// ==================== 辅助方法 ====================
/**
* 轮询检查文档处理状态
*
* @param datasetId 知识库ID
* @param documentId 文档ID
* @param maxAttempts 最大尝试次数默认30次
* @param interval 轮询间隔毫秒默认2000ms
* @returns 文档信息
*/
async waitForDocumentProcessing(
datasetId: string,
documentId: string,
maxAttempts: number = 30,
interval: number = 2000
): Promise<Document> {
for (let i = 0; i < maxAttempts; i++) {
const document = await this.getDocument(datasetId, documentId);
if (document.indexing_status === 'completed') {
return document;
}
if (document.indexing_status === 'error') {
throw new Error(`Document processing failed: ${document.error || 'Unknown error'}`);
}
// 等待后继续
await new Promise((resolve) => setTimeout(resolve, interval));
}
throw new Error('Document processing timeout');
}
/**
* 一键上传文档到知识库(上传 + 等待处理完成)
*
* @param datasetId 知识库ID
* @param file 文件 Buffer
* @param filename 文件名
* @returns 处理完成的文档信息
*/
async uploadAndProcessDocument(
datasetId: string,
file: Buffer,
filename: string
): Promise<Document> {
// 1. 直接上传文档
const createResult = await this.uploadDocumentDirectly(datasetId, file, filename);
// 2. 等待处理完成
const document = await this.waitForDocumentProcessing(
datasetId,
createResult.document.id
);
return document;
}
}
// 导出单例实例
export const difyClient = new DifyClient();