refactor(backend): incremental architecture evolution (Task 19)
- Add common/ layer for shared capabilities (LLM, RAG, document, middleware) - Add legacy/ layer for existing business code - Move files to new structure (controllers, routes, services) - Update index.ts for new route registration - System remains fully functional
This commit is contained in:
272
backend/src/common/document/ExtractionClient.ts
Normal file
272
backend/src/common/document/ExtractionClient.ts
Normal file
@@ -0,0 +1,272 @@
|
||||
import FormData from 'form-data';
|
||||
import axios from 'axios';
|
||||
|
||||
/**
|
||||
* Extraction Service Client
|
||||
* 调用Python微服务进行文档提取
|
||||
*/
|
||||
|
||||
const EXTRACTION_SERVICE_URL = process.env.EXTRACTION_SERVICE_URL || 'http://localhost:8000';
|
||||
|
||||
export interface ExtractionResult {
|
||||
success: boolean;
|
||||
method: string; // pymupdf/nougat/mammoth/direct
|
||||
text: string;
|
||||
quality?: number;
|
||||
encoding?: string;
|
||||
language?: string;
|
||||
metadata: {
|
||||
filename: string;
|
||||
char_count?: number;
|
||||
line_count?: number;
|
||||
file_size?: number;
|
||||
page_count?: number;
|
||||
has_tables?: boolean;
|
||||
[key: string]: any;
|
||||
};
|
||||
error?: string;
|
||||
}
|
||||
|
||||
class ExtractionClient {
|
||||
private baseUrl: string;
|
||||
|
||||
constructor(baseUrl: string = EXTRACTION_SERVICE_URL) {
|
||||
this.baseUrl = baseUrl;
|
||||
}
|
||||
|
||||
/**
|
||||
* 健康检查
|
||||
*/
|
||||
async health(): Promise<{
|
||||
status: string;
|
||||
checks: any;
|
||||
timestamp: string;
|
||||
}> {
|
||||
try {
|
||||
const response = await axios.get(`${this.baseUrl}/api/health`);
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('[ExtractionClient] Health check failed:', error);
|
||||
throw new Error('Extraction service is unavailable');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 通用文档提取接口
|
||||
* 自动检测文件类型并调用相应的提取方法
|
||||
*/
|
||||
async extractDocument(
|
||||
file: Buffer,
|
||||
filename: string
|
||||
): Promise<ExtractionResult> {
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file, filename);
|
||||
|
||||
const response = await axios.post<ExtractionResult>(
|
||||
`${this.baseUrl}/api/extract`,
|
||||
formData,
|
||||
{
|
||||
headers: {
|
||||
...formData.getHeaders(),
|
||||
},
|
||||
timeout: 120000, // 2分钟超时
|
||||
}
|
||||
);
|
||||
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('[ExtractionClient] Extract failed:', error);
|
||||
|
||||
if (axios.isAxiosError(error) && error.response) {
|
||||
throw new Error(`Extraction failed: ${error.response.data.detail || error.message}`);
|
||||
}
|
||||
|
||||
throw new Error('Document extraction failed');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* PDF专用提取接口
|
||||
*/
|
||||
async extractPdf(
|
||||
file: Buffer,
|
||||
filename: string,
|
||||
method?: 'auto' | 'nougat' | 'pymupdf'
|
||||
): Promise<ExtractionResult> {
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file, filename);
|
||||
|
||||
if (method) {
|
||||
formData.append('method', method);
|
||||
}
|
||||
|
||||
const response = await axios.post<ExtractionResult>(
|
||||
`${this.baseUrl}/api/extract/pdf`,
|
||||
formData,
|
||||
{
|
||||
headers: {
|
||||
...formData.getHeaders(),
|
||||
},
|
||||
timeout: 180000, // 3分钟超时(Nougat较慢)
|
||||
}
|
||||
);
|
||||
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('[ExtractionClient] PDF extract failed:', error);
|
||||
|
||||
if (axios.isAxiosError(error) && error.response) {
|
||||
throw new Error(`PDF extraction failed: ${error.response.data.detail || error.message}`);
|
||||
}
|
||||
|
||||
throw new Error('PDF extraction failed');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Docx专用提取接口
|
||||
*/
|
||||
async extractDocx(
|
||||
file: Buffer,
|
||||
filename: string
|
||||
): Promise<ExtractionResult> {
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file, filename);
|
||||
|
||||
const response = await axios.post<ExtractionResult>(
|
||||
`${this.baseUrl}/api/extract/docx`,
|
||||
formData,
|
||||
{
|
||||
headers: {
|
||||
...formData.getHeaders(),
|
||||
},
|
||||
timeout: 60000, // 1分钟超时
|
||||
}
|
||||
);
|
||||
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('[ExtractionClient] Docx extract failed:', error);
|
||||
|
||||
if (axios.isAxiosError(error) && error.response) {
|
||||
throw new Error(`Docx extraction failed: ${error.response.data.detail || error.message}`);
|
||||
}
|
||||
|
||||
throw new Error('Docx extraction failed');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Txt专用提取接口
|
||||
*/
|
||||
async extractTxt(
|
||||
file: Buffer,
|
||||
filename: string
|
||||
): Promise<ExtractionResult> {
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file, filename);
|
||||
|
||||
const response = await axios.post<ExtractionResult>(
|
||||
`${this.baseUrl}/api/extract/txt`,
|
||||
formData,
|
||||
{
|
||||
headers: {
|
||||
...formData.getHeaders(),
|
||||
},
|
||||
timeout: 30000, // 30秒超时
|
||||
}
|
||||
);
|
||||
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('[ExtractionClient] Txt extract failed:', error);
|
||||
|
||||
if (axios.isAxiosError(error) && error.response) {
|
||||
throw new Error(`Txt extraction failed: ${error.response.data.detail || error.message}`);
|
||||
}
|
||||
|
||||
throw new Error('Txt extraction failed');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 检测PDF语言
|
||||
*/
|
||||
async detectLanguage(
|
||||
file: Buffer,
|
||||
filename: string
|
||||
): Promise<{
|
||||
language: string;
|
||||
chinese_ratio: number;
|
||||
chinese_chars: number;
|
||||
total_chars: number;
|
||||
}> {
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file, filename);
|
||||
|
||||
const response = await axios.post(
|
||||
`${this.baseUrl}/api/detect-language`,
|
||||
formData,
|
||||
{
|
||||
headers: {
|
||||
...formData.getHeaders(),
|
||||
},
|
||||
timeout: 30000,
|
||||
}
|
||||
);
|
||||
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('[ExtractionClient] Language detection failed:', error);
|
||||
throw new Error('Language detection failed');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取PDF处理策略
|
||||
*/
|
||||
async getPdfStrategy(
|
||||
file: Buffer,
|
||||
filename: string
|
||||
): Promise<{
|
||||
detected_language: string;
|
||||
recommended_method: string;
|
||||
reason: string;
|
||||
nougat_available: boolean;
|
||||
}> {
|
||||
try {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file, filename);
|
||||
|
||||
const response = await axios.post(
|
||||
`${this.baseUrl}/api/pdf-strategy`,
|
||||
formData,
|
||||
{
|
||||
headers: {
|
||||
...formData.getHeaders(),
|
||||
},
|
||||
timeout: 30000,
|
||||
}
|
||||
);
|
||||
|
||||
return response.data;
|
||||
} catch (error) {
|
||||
console.error('[ExtractionClient] Get PDF strategy failed:', error);
|
||||
throw new Error('Get PDF strategy failed');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 导出单例
|
||||
export const extractionClient = new ExtractionClient();
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
150
backend/src/common/llm/adapters/DeepSeekAdapter.ts
Normal file
150
backend/src/common/llm/adapters/DeepSeekAdapter.ts
Normal file
@@ -0,0 +1,150 @@
|
||||
import axios from 'axios';
|
||||
import { ILLMAdapter, Message, LLMOptions, LLMResponse, StreamChunk } from './types.js';
|
||||
import { config } from '../../../config/env.js';
|
||||
|
||||
export class DeepSeekAdapter implements ILLMAdapter {
|
||||
modelName: string;
|
||||
private apiKey: string;
|
||||
private baseURL: string;
|
||||
|
||||
constructor(modelName: string = 'deepseek-chat') {
|
||||
this.modelName = modelName;
|
||||
this.apiKey = config.deepseekApiKey || '';
|
||||
this.baseURL = 'https://api.deepseek.com/v1';
|
||||
|
||||
if (!this.apiKey) {
|
||||
throw new Error('DeepSeek API key is not configured');
|
||||
}
|
||||
}
|
||||
|
||||
// 非流式调用
|
||||
async chat(messages: Message[], options?: LLMOptions): Promise<LLMResponse> {
|
||||
try {
|
||||
const response = await axios.post(
|
||||
`${this.baseURL}/chat/completions`,
|
||||
{
|
||||
model: this.modelName,
|
||||
messages: messages,
|
||||
temperature: options?.temperature ?? 0.7,
|
||||
max_tokens: options?.maxTokens ?? 2000,
|
||||
top_p: options?.topP ?? 0.9,
|
||||
stream: false,
|
||||
},
|
||||
{
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
},
|
||||
timeout: 180000, // 180秒超时(3分钟)- 稿件评估需要更长时间
|
||||
}
|
||||
);
|
||||
|
||||
const choice = response.data.choices[0];
|
||||
|
||||
return {
|
||||
content: choice.message.content,
|
||||
model: response.data.model,
|
||||
usage: {
|
||||
promptTokens: response.data.usage.prompt_tokens,
|
||||
completionTokens: response.data.usage.completion_tokens,
|
||||
totalTokens: response.data.usage.total_tokens,
|
||||
},
|
||||
finishReason: choice.finish_reason,
|
||||
};
|
||||
} catch (error: unknown) {
|
||||
console.error('DeepSeek API Error:', error);
|
||||
if (axios.isAxiosError(error)) {
|
||||
throw new Error(
|
||||
`DeepSeek API调用失败: ${error.response?.data?.error?.message || error.message}`
|
||||
);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
// 流式调用
|
||||
async *chatStream(
|
||||
messages: Message[],
|
||||
options?: LLMOptions,
|
||||
onChunk?: (chunk: StreamChunk) => void
|
||||
): AsyncGenerator<StreamChunk, void, unknown> {
|
||||
try {
|
||||
const response = await axios.post(
|
||||
`${this.baseURL}/chat/completions`,
|
||||
{
|
||||
model: this.modelName,
|
||||
messages: messages,
|
||||
temperature: options?.temperature ?? 0.7,
|
||||
max_tokens: options?.maxTokens ?? 2000,
|
||||
top_p: options?.topP ?? 0.9,
|
||||
stream: true,
|
||||
},
|
||||
{
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
},
|
||||
responseType: 'stream',
|
||||
timeout: 60000,
|
||||
}
|
||||
);
|
||||
|
||||
const stream = response.data;
|
||||
let buffer = '';
|
||||
|
||||
for await (const chunk of stream) {
|
||||
buffer += chunk.toString();
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() || '';
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmedLine = line.trim();
|
||||
if (!trimmedLine || trimmedLine === 'data: [DONE]') {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (trimmedLine.startsWith('data: ')) {
|
||||
try {
|
||||
const jsonStr = trimmedLine.slice(6);
|
||||
const data = JSON.parse(jsonStr);
|
||||
|
||||
const choice = data.choices[0];
|
||||
const content = choice.delta?.content || '';
|
||||
|
||||
const streamChunk: StreamChunk = {
|
||||
content: content,
|
||||
done: choice.finish_reason === 'stop',
|
||||
model: data.model,
|
||||
};
|
||||
|
||||
if (choice.finish_reason === 'stop' && data.usage) {
|
||||
streamChunk.usage = {
|
||||
promptTokens: data.usage.prompt_tokens,
|
||||
completionTokens: data.usage.completion_tokens,
|
||||
totalTokens: data.usage.total_tokens,
|
||||
};
|
||||
}
|
||||
|
||||
if (onChunk) {
|
||||
onChunk(streamChunk);
|
||||
}
|
||||
|
||||
yield streamChunk;
|
||||
} catch (parseError) {
|
||||
console.error('Failed to parse SSE data:', parseError);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('DeepSeek Stream Error:', error);
|
||||
if (axios.isAxiosError(error)) {
|
||||
throw new Error(
|
||||
`DeepSeek流式调用失败: ${error.response?.data?.error?.message || error.message}`
|
||||
);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
84
backend/src/common/llm/adapters/LLMFactory.ts
Normal file
84
backend/src/common/llm/adapters/LLMFactory.ts
Normal file
@@ -0,0 +1,84 @@
|
||||
import { ILLMAdapter, ModelType } from './types.js';
|
||||
import { DeepSeekAdapter } from './DeepSeekAdapter.js';
|
||||
import { QwenAdapter } from './QwenAdapter.js';
|
||||
|
||||
/**
|
||||
* LLM工厂类
|
||||
* 根据模型类型创建相应的适配器实例
|
||||
*/
|
||||
export class LLMFactory {
|
||||
private static adapters: Map<string, ILLMAdapter> = new Map();
|
||||
|
||||
/**
|
||||
* 获取LLM适配器实例(单例模式)
|
||||
* @param modelType 模型类型
|
||||
* @returns LLM适配器实例
|
||||
*/
|
||||
static getAdapter(modelType: ModelType): ILLMAdapter {
|
||||
// 如果已经创建过该适配器,直接返回
|
||||
if (this.adapters.has(modelType)) {
|
||||
return this.adapters.get(modelType)!;
|
||||
}
|
||||
|
||||
// 根据模型类型创建适配器
|
||||
let adapter: ILLMAdapter;
|
||||
|
||||
switch (modelType) {
|
||||
case 'deepseek-v3':
|
||||
adapter = new DeepSeekAdapter('deepseek-chat');
|
||||
break;
|
||||
|
||||
case 'qwen3-72b':
|
||||
adapter = new QwenAdapter('qwen-plus'); // Qwen3-72B对应的模型名
|
||||
break;
|
||||
|
||||
case 'qwen-long':
|
||||
adapter = new QwenAdapter('qwen-long'); // 1M上下文超长文本模型
|
||||
break;
|
||||
|
||||
case 'gemini-pro':
|
||||
// TODO: 实现Gemini适配器
|
||||
throw new Error('Gemini adapter is not implemented yet');
|
||||
|
||||
default:
|
||||
throw new Error(`Unsupported model type: ${modelType}`);
|
||||
}
|
||||
|
||||
// 缓存适配器实例
|
||||
this.adapters.set(modelType, adapter);
|
||||
return adapter;
|
||||
}
|
||||
|
||||
/**
|
||||
* 清除适配器缓存
|
||||
* @param modelType 可选,指定清除某个模型的适配器,不传则清除所有
|
||||
*/
|
||||
static clearCache(modelType?: ModelType): void {
|
||||
if (modelType) {
|
||||
this.adapters.delete(modelType);
|
||||
} else {
|
||||
this.adapters.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查模型是否支持
|
||||
* @param modelType 模型类型
|
||||
* @returns 是否支持
|
||||
*/
|
||||
static isSupported(modelType: string): boolean {
|
||||
return ['deepseek-v3', 'qwen3-72b', 'qwen-long', 'gemini-pro'].includes(modelType);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取所有支持的模型列表
|
||||
* @returns 支持的模型列表
|
||||
*/
|
||||
static getSupportedModels(): ModelType[] {
|
||||
return ['deepseek-v3', 'qwen3-72b', 'qwen-long', 'gemini-pro'];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
171
backend/src/common/llm/adapters/QwenAdapter.ts
Normal file
171
backend/src/common/llm/adapters/QwenAdapter.ts
Normal file
@@ -0,0 +1,171 @@
|
||||
import axios from 'axios';
|
||||
import { ILLMAdapter, Message, LLMOptions, LLMResponse, StreamChunk } from './types.js';
|
||||
import { config } from '../../../config/env.js';
|
||||
|
||||
export class QwenAdapter implements ILLMAdapter {
|
||||
modelName: string;
|
||||
private apiKey: string;
|
||||
private baseURL: string;
|
||||
|
||||
constructor(modelName: string = 'qwen-turbo') {
|
||||
this.modelName = modelName;
|
||||
this.apiKey = config.dashscopeApiKey || '';
|
||||
this.baseURL = 'https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation';
|
||||
|
||||
if (!this.apiKey) {
|
||||
throw new Error('DashScope API key is not configured. Please set DASHSCOPE_API_KEY in .env file.');
|
||||
}
|
||||
}
|
||||
|
||||
// 非流式调用
|
||||
async chat(messages: Message[], options?: LLMOptions): Promise<LLMResponse> {
|
||||
try {
|
||||
const response = await axios.post(
|
||||
this.baseURL,
|
||||
{
|
||||
model: this.modelName,
|
||||
input: {
|
||||
messages: messages,
|
||||
},
|
||||
parameters: {
|
||||
temperature: options?.temperature ?? 0.7,
|
||||
max_tokens: options?.maxTokens ?? 2000,
|
||||
top_p: options?.topP ?? 0.9,
|
||||
result_format: 'message',
|
||||
},
|
||||
},
|
||||
{
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
},
|
||||
timeout: 180000, // 180秒超时(3分钟)- 稿件评估需要更长时间
|
||||
}
|
||||
);
|
||||
|
||||
const output = response.data.output;
|
||||
const usage = response.data.usage;
|
||||
|
||||
return {
|
||||
content: output.choices[0].message.content,
|
||||
model: this.modelName,
|
||||
usage: {
|
||||
promptTokens: usage.input_tokens,
|
||||
completionTokens: usage.output_tokens,
|
||||
totalTokens: usage.total_tokens || usage.input_tokens + usage.output_tokens,
|
||||
},
|
||||
finishReason: output.choices[0].finish_reason,
|
||||
};
|
||||
} catch (error: unknown) {
|
||||
console.error('Qwen API Error:', error);
|
||||
if (axios.isAxiosError(error)) {
|
||||
throw new Error(
|
||||
`Qwen API调用失败: ${error.response?.data?.message || error.message}`
|
||||
);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
// 流式调用
|
||||
async *chatStream(
|
||||
messages: Message[],
|
||||
options?: LLMOptions,
|
||||
onChunk?: (chunk: StreamChunk) => void
|
||||
): AsyncGenerator<StreamChunk, void, unknown> {
|
||||
try {
|
||||
// Qwen-Long需要更长的超时时间(全文模式可能传输~750K tokens)
|
||||
const timeout = this.modelName === 'qwen-long' ? 300000 : 60000; // 5分钟 vs 1分钟
|
||||
|
||||
console.log(`[QwenAdapter] 开始流式调用`, {
|
||||
model: this.modelName,
|
||||
timeout: `${timeout / 1000}秒`,
|
||||
messagesCount: messages.length,
|
||||
});
|
||||
|
||||
const response = await axios.post(
|
||||
this.baseURL,
|
||||
{
|
||||
model: this.modelName,
|
||||
input: {
|
||||
messages: messages,
|
||||
},
|
||||
parameters: {
|
||||
temperature: options?.temperature ?? 0.7,
|
||||
max_tokens: options?.maxTokens ?? 2000,
|
||||
top_p: options?.topP ?? 0.9,
|
||||
result_format: 'message',
|
||||
incremental_output: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
'X-DashScope-SSE': 'enable',
|
||||
},
|
||||
responseType: 'stream',
|
||||
timeout: timeout,
|
||||
}
|
||||
);
|
||||
|
||||
const stream = response.data;
|
||||
let buffer = '';
|
||||
|
||||
for await (const chunk of stream) {
|
||||
buffer += chunk.toString();
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() || '';
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmedLine = line.trim();
|
||||
if (!trimmedLine || trimmedLine.startsWith(':')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (trimmedLine.startsWith('data:')) {
|
||||
try {
|
||||
const jsonStr = trimmedLine.slice(5).trim();
|
||||
const data = JSON.parse(jsonStr);
|
||||
|
||||
const output = data.output;
|
||||
const choice = output.choices[0];
|
||||
const content = choice.message?.content || '';
|
||||
|
||||
const streamChunk: StreamChunk = {
|
||||
content: content,
|
||||
done: choice.finish_reason === 'stop',
|
||||
model: this.modelName,
|
||||
};
|
||||
|
||||
if (choice.finish_reason === 'stop' && data.usage) {
|
||||
streamChunk.usage = {
|
||||
promptTokens: data.usage.input_tokens,
|
||||
completionTokens: data.usage.output_tokens,
|
||||
totalTokens: data.usage.total_tokens || data.usage.input_tokens + data.usage.output_tokens,
|
||||
};
|
||||
}
|
||||
|
||||
if (onChunk) {
|
||||
onChunk(streamChunk);
|
||||
}
|
||||
|
||||
yield streamChunk;
|
||||
} catch (parseError) {
|
||||
console.error('Failed to parse Qwen SSE data:', parseError);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Qwen Stream Error:', error);
|
||||
if (axios.isAxiosError(error)) {
|
||||
throw new Error(
|
||||
`Qwen流式调用失败: ${error.response?.data?.message || error.message}`
|
||||
);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
58
backend/src/common/llm/adapters/types.ts
Normal file
58
backend/src/common/llm/adapters/types.ts
Normal file
@@ -0,0 +1,58 @@
|
||||
// LLM适配器类型定义
|
||||
|
||||
export interface Message {
|
||||
role: 'system' | 'user' | 'assistant';
|
||||
content: string;
|
||||
}
|
||||
|
||||
export interface LLMOptions {
|
||||
temperature?: number;
|
||||
maxTokens?: number;
|
||||
topP?: number;
|
||||
stream?: boolean;
|
||||
}
|
||||
|
||||
export interface LLMResponse {
|
||||
content: string;
|
||||
model: string;
|
||||
usage?: {
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
totalTokens: number;
|
||||
};
|
||||
finishReason?: string;
|
||||
}
|
||||
|
||||
export interface StreamChunk {
|
||||
content: string;
|
||||
done: boolean;
|
||||
model?: string;
|
||||
usage?: {
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
totalTokens: number;
|
||||
};
|
||||
}
|
||||
|
||||
// LLM适配器接口
|
||||
export interface ILLMAdapter {
|
||||
// 模型名称
|
||||
modelName: string;
|
||||
|
||||
// 非流式调用
|
||||
chat(messages: Message[], options?: LLMOptions): Promise<LLMResponse>;
|
||||
|
||||
// 流式调用
|
||||
chatStream(
|
||||
messages: Message[],
|
||||
options?: LLMOptions,
|
||||
onChunk?: (chunk: StreamChunk) => void
|
||||
): AsyncGenerator<StreamChunk, void, unknown>;
|
||||
}
|
||||
|
||||
// 支持的模型类型
|
||||
export type ModelType = 'deepseek-v3' | 'qwen3-72b' | 'qwen-long' | 'gemini-pro';
|
||||
|
||||
|
||||
|
||||
|
||||
113
backend/src/common/middleware/validateProject.ts
Normal file
113
backend/src/common/middleware/validateProject.ts
Normal file
@@ -0,0 +1,113 @@
|
||||
import { FastifyRequest, FastifyReply } from 'fastify';
|
||||
|
||||
interface CreateProjectBody {
|
||||
name: string;
|
||||
background?: string;
|
||||
researchType: 'observational' | 'interventional';
|
||||
}
|
||||
|
||||
interface UpdateProjectBody {
|
||||
name?: string;
|
||||
background?: string;
|
||||
researchType?: 'observational' | 'interventional';
|
||||
}
|
||||
|
||||
// 验证创建项目请求
|
||||
export async function validateProjectCreate(request: FastifyRequest, reply: FastifyReply) {
|
||||
const body = request.body as CreateProjectBody;
|
||||
|
||||
// 验证必填字段
|
||||
if (!body.name || typeof body.name !== 'string') {
|
||||
return reply.code(400).send({
|
||||
success: false,
|
||||
message: '项目名称为必填项',
|
||||
});
|
||||
}
|
||||
|
||||
if (body.name.trim().length === 0) {
|
||||
return reply.code(400).send({
|
||||
success: false,
|
||||
message: '项目名称不能为空',
|
||||
});
|
||||
}
|
||||
|
||||
if (body.name.length > 100) {
|
||||
return reply.code(400).send({
|
||||
success: false,
|
||||
message: '项目名称不能超过100个字符',
|
||||
});
|
||||
}
|
||||
|
||||
// 验证研究类型
|
||||
if (!body.researchType) {
|
||||
return reply.code(400).send({
|
||||
success: false,
|
||||
message: '研究类型为必填项',
|
||||
});
|
||||
}
|
||||
|
||||
if (!['observational', 'interventional'].includes(body.researchType)) {
|
||||
return reply.code(400).send({
|
||||
success: false,
|
||||
message: '研究类型必须是observational或interventional',
|
||||
});
|
||||
}
|
||||
|
||||
// 验证项目背景(可选,但有长度限制)
|
||||
if (body.background && body.background.length > 2000) {
|
||||
return reply.code(400).send({
|
||||
success: false,
|
||||
message: '项目背景不能超过2000个字符',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 验证更新项目请求
|
||||
export async function validateProjectUpdate(request: FastifyRequest, reply: FastifyReply) {
|
||||
const body = request.body as UpdateProjectBody;
|
||||
|
||||
// 至少需要更新一个字段
|
||||
if (!body.name && !body.background && !body.researchType) {
|
||||
return reply.code(400).send({
|
||||
success: false,
|
||||
message: '至少需要提供一个要更新的字段',
|
||||
});
|
||||
}
|
||||
|
||||
// 验证项目名称
|
||||
if (body.name !== undefined) {
|
||||
if (typeof body.name !== 'string' || body.name.trim().length === 0) {
|
||||
return reply.code(400).send({
|
||||
success: false,
|
||||
message: '项目名称不能为空',
|
||||
});
|
||||
}
|
||||
|
||||
if (body.name.length > 100) {
|
||||
return reply.code(400).send({
|
||||
success: false,
|
||||
message: '项目名称不能超过100个字符',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 验证研究类型
|
||||
if (body.researchType && !['observational', 'interventional'].includes(body.researchType)) {
|
||||
return reply.code(400).send({
|
||||
success: false,
|
||||
message: '研究类型必须是observational或interventional',
|
||||
});
|
||||
}
|
||||
|
||||
// 验证项目背景
|
||||
if (body.background && body.background.length > 2000) {
|
||||
return reply.code(400).send({
|
||||
success: false,
|
||||
message: '项目背景不能超过2000个字符',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
323
backend/src/common/rag/DifyClient.ts
Normal file
323
backend/src/common/rag/DifyClient.ts
Normal file
@@ -0,0 +1,323 @@
|
||||
import axios, { AxiosInstance, AxiosError } from 'axios';
|
||||
import FormData from 'form-data';
|
||||
import {
|
||||
Dataset,
|
||||
CreateDatasetRequest,
|
||||
CreateDatasetResponse,
|
||||
DatasetListResponse,
|
||||
Document,
|
||||
DocumentListResponse,
|
||||
CreateDocumentByFileRequest,
|
||||
CreateDocumentResponse,
|
||||
RetrievalRequest,
|
||||
RetrievalResponse,
|
||||
DifyError,
|
||||
DifyErrorResponse,
|
||||
} from './types.js';
|
||||
import { config } from '../../config/env.js';
|
||||
|
||||
/**
|
||||
* Dify API 客户端
|
||||
*
|
||||
* 封装 Dify 知识库相关 API
|
||||
*/
|
||||
export class DifyClient {
|
||||
private client: AxiosInstance;
|
||||
private apiKey: string;
|
||||
private apiUrl: string;
|
||||
|
||||
constructor(apiKey?: string, apiUrl?: string) {
|
||||
this.apiKey = apiKey || config.difyApiKey;
|
||||
this.apiUrl = apiUrl || config.difyApiUrl;
|
||||
|
||||
if (!this.apiKey) {
|
||||
throw new Error('Dify API Key is required');
|
||||
}
|
||||
|
||||
if (!this.apiUrl) {
|
||||
throw new Error('Dify API URL is required');
|
||||
}
|
||||
|
||||
// 创建 axios 实例
|
||||
this.client = axios.create({
|
||||
baseURL: this.apiUrl,
|
||||
headers: {
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
timeout: 30000, // 30秒超时
|
||||
});
|
||||
|
||||
// 响应拦截器:统一错误处理
|
||||
this.client.interceptors.response.use(
|
||||
(response) => response,
|
||||
(error: AxiosError) => {
|
||||
if (error.response?.data) {
|
||||
const errorData = error.response.data as DifyErrorResponse;
|
||||
throw new DifyError({
|
||||
code: errorData.code || 'UNKNOWN_ERROR',
|
||||
message: errorData.message || error.message,
|
||||
status: error.response.status,
|
||||
});
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
// ==================== 知识库管理 API ====================
|
||||
|
||||
/**
|
||||
* 创建知识库
|
||||
*
|
||||
* @param params 创建参数
|
||||
* @returns 创建的知识库信息
|
||||
*/
|
||||
async createDataset(params: CreateDatasetRequest): Promise<CreateDatasetResponse> {
|
||||
const response = await this.client.post<CreateDatasetResponse>('/datasets', params);
|
||||
return response.data;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取知识库列表
|
||||
*
|
||||
* @param page 页码(从1开始)
|
||||
* @param limit 每页数量(默认20)
|
||||
* @returns 知识库列表
|
||||
*/
|
||||
async getDatasets(page: number = 1, limit: number = 20): Promise<DatasetListResponse> {
|
||||
const response = await this.client.get<DatasetListResponse>('/datasets', {
|
||||
params: { page, limit },
|
||||
});
|
||||
return response.data;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取知识库详情
|
||||
*
|
||||
* @param datasetId 知识库ID
|
||||
* @returns 知识库信息
|
||||
*/
|
||||
async getDataset(datasetId: string): Promise<Dataset> {
|
||||
const response = await this.client.get<Dataset>(`/datasets/${datasetId}`);
|
||||
return response.data;
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除知识库
|
||||
*
|
||||
* @param datasetId 知识库ID
|
||||
*/
|
||||
async deleteDataset(datasetId: string): Promise<void> {
|
||||
await this.client.delete(`/datasets/${datasetId}`);
|
||||
}
|
||||
|
||||
// ==================== 文档管理 API ====================
|
||||
|
||||
/**
|
||||
* 直接上传文档到知识库(简化版)
|
||||
*
|
||||
* @param datasetId 知识库ID
|
||||
* @param file 文件 Buffer
|
||||
* @param filename 文件名
|
||||
* @param params 创建参数
|
||||
* @returns 创建的文档信息
|
||||
*/
|
||||
async uploadDocumentDirectly(
|
||||
datasetId: string,
|
||||
file: Buffer,
|
||||
filename: string,
|
||||
params?: Partial<CreateDocumentByFileRequest>
|
||||
): Promise<CreateDocumentResponse> {
|
||||
const formData = new FormData();
|
||||
formData.append('file', file, filename);
|
||||
|
||||
// 添加其他参数
|
||||
const defaultParams = {
|
||||
indexing_technique: 'high_quality',
|
||||
process_rule: {
|
||||
mode: 'automatic',
|
||||
rules: {
|
||||
pre_processing_rules: [
|
||||
{ id: 'remove_extra_spaces', enabled: true },
|
||||
{ id: 'remove_urls_emails', enabled: false },
|
||||
],
|
||||
segmentation: {
|
||||
separator: '\n',
|
||||
max_tokens: 1500, // Phase 1优化:从500增加到1500 tokens
|
||||
},
|
||||
},
|
||||
},
|
||||
...params,
|
||||
};
|
||||
|
||||
formData.append('data', JSON.stringify(defaultParams));
|
||||
|
||||
const response = await this.client.post<CreateDocumentResponse>(
|
||||
`/datasets/${datasetId}/document/create_by_file`,
|
||||
formData,
|
||||
{
|
||||
headers: {
|
||||
...formData.getHeaders(),
|
||||
'Authorization': `Bearer ${this.apiKey}`,
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
return response.data;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取文档列表
|
||||
*
|
||||
* @param datasetId 知识库ID
|
||||
* @param page 页码(从1开始)
|
||||
* @param limit 每页数量(默认20)
|
||||
* @returns 文档列表
|
||||
*/
|
||||
async getDocuments(
|
||||
datasetId: string,
|
||||
page: number = 1,
|
||||
limit: number = 20
|
||||
): Promise<DocumentListResponse> {
|
||||
const response = await this.client.get<DocumentListResponse>(
|
||||
`/datasets/${datasetId}/documents`,
|
||||
{
|
||||
params: { page, limit },
|
||||
}
|
||||
);
|
||||
return response.data;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取文档详情
|
||||
*
|
||||
* @param datasetId 知识库ID
|
||||
* @param documentId 文档ID
|
||||
* @returns 文档信息
|
||||
*/
|
||||
async getDocument(datasetId: string, documentId: string): Promise<Document> {
|
||||
const response = await this.client.get<Document>(
|
||||
`/datasets/${datasetId}/documents/${documentId}`
|
||||
);
|
||||
return response.data;
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除文档
|
||||
*
|
||||
* @param datasetId 知识库ID
|
||||
* @param documentId 文档ID
|
||||
*/
|
||||
async deleteDocument(datasetId: string, documentId: string): Promise<void> {
|
||||
await this.client.delete(`/datasets/${datasetId}/documents/${documentId}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* 更新文档(重新索引)
|
||||
*
|
||||
* @param datasetId 知识库ID
|
||||
* @param documentId 文档ID
|
||||
*/
|
||||
async updateDocument(datasetId: string, documentId: string): Promise<void> {
|
||||
await this.client.post(`/datasets/${datasetId}/documents/${documentId}/processing`);
|
||||
}
|
||||
|
||||
// ==================== 知识库检索 API ====================
|
||||
|
||||
/**
|
||||
* 检索知识库
|
||||
*
|
||||
* @param datasetId 知识库ID
|
||||
* @param query 查询文本
|
||||
* @param params 检索参数
|
||||
* @returns 检索结果
|
||||
*/
|
||||
async retrieveKnowledge(
|
||||
datasetId: string,
|
||||
query: string,
|
||||
params?: Partial<RetrievalRequest>
|
||||
): Promise<RetrievalResponse> {
|
||||
const requestParams: RetrievalRequest = {
|
||||
query,
|
||||
retrieval_model: {
|
||||
search_method: 'semantic_search',
|
||||
reranking_enable: false,
|
||||
top_k: 3,
|
||||
score_threshold_enabled: false,
|
||||
...params?.retrieval_model,
|
||||
},
|
||||
};
|
||||
|
||||
const response = await this.client.post<RetrievalResponse>(
|
||||
`/datasets/${datasetId}/retrieve`,
|
||||
requestParams
|
||||
);
|
||||
|
||||
return response.data;
|
||||
}
|
||||
|
||||
// ==================== 辅助方法 ====================
|
||||
|
||||
/**
|
||||
* 轮询检查文档处理状态
|
||||
*
|
||||
* @param datasetId 知识库ID
|
||||
* @param documentId 文档ID
|
||||
* @param maxAttempts 最大尝试次数(默认30次)
|
||||
* @param interval 轮询间隔(毫秒,默认2000ms)
|
||||
* @returns 文档信息
|
||||
*/
|
||||
async waitForDocumentProcessing(
|
||||
datasetId: string,
|
||||
documentId: string,
|
||||
maxAttempts: number = 30,
|
||||
interval: number = 2000
|
||||
): Promise<Document> {
|
||||
for (let i = 0; i < maxAttempts; i++) {
|
||||
const document = await this.getDocument(datasetId, documentId);
|
||||
|
||||
if (document.indexing_status === 'completed') {
|
||||
return document;
|
||||
}
|
||||
|
||||
if (document.indexing_status === 'error') {
|
||||
throw new Error(`Document processing failed: ${document.error || 'Unknown error'}`);
|
||||
}
|
||||
|
||||
// 等待后继续
|
||||
await new Promise((resolve) => setTimeout(resolve, interval));
|
||||
}
|
||||
|
||||
throw new Error('Document processing timeout');
|
||||
}
|
||||
|
||||
/**
|
||||
* 一键上传文档到知识库(上传 + 等待处理完成)
|
||||
*
|
||||
* @param datasetId 知识库ID
|
||||
* @param file 文件 Buffer
|
||||
* @param filename 文件名
|
||||
* @returns 处理完成的文档信息
|
||||
*/
|
||||
async uploadAndProcessDocument(
|
||||
datasetId: string,
|
||||
file: Buffer,
|
||||
filename: string
|
||||
): Promise<Document> {
|
||||
// 1. 直接上传文档
|
||||
const createResult = await this.uploadDocumentDirectly(datasetId, file, filename);
|
||||
|
||||
// 2. 等待处理完成
|
||||
const document = await this.waitForDocumentProcessing(
|
||||
datasetId,
|
||||
createResult.document.id
|
||||
);
|
||||
|
||||
return document;
|
||||
}
|
||||
}
|
||||
|
||||
// 导出单例实例
|
||||
export const difyClient = new DifyClient();
|
||||
|
||||
231
backend/src/common/rag/types.ts
Normal file
231
backend/src/common/rag/types.ts
Normal file
@@ -0,0 +1,231 @@
|
||||
/**
|
||||
* Dify API 类型定义
|
||||
*/
|
||||
|
||||
// ==================== 知识库相关类型 ====================
|
||||
|
||||
/**
|
||||
* 知识库信息
|
||||
*/
|
||||
export interface Dataset {
|
||||
id: string;
|
||||
name: string;
|
||||
description: string;
|
||||
permission: 'only_me' | 'all_team_members';
|
||||
data_source_type: 'upload_file' | 'notion_import' | 'website_crawl';
|
||||
indexing_technique: 'high_quality' | 'economy';
|
||||
app_count: number;
|
||||
document_count: number;
|
||||
word_count: number;
|
||||
created_by: string;
|
||||
created_at: number;
|
||||
updated_by: string;
|
||||
updated_at: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建知识库请求参数
|
||||
*/
|
||||
export interface CreateDatasetRequest {
|
||||
name: string;
|
||||
description?: string;
|
||||
permission?: 'only_me' | 'all_team_members';
|
||||
indexing_technique?: 'high_quality' | 'economy';
|
||||
embedding_model?: string;
|
||||
embedding_model_provider?: string;
|
||||
retrieval_model?: {
|
||||
search_method: 'semantic_search' | 'full_text_search' | 'hybrid_search';
|
||||
reranking_enable?: boolean;
|
||||
reranking_model?: {
|
||||
reranking_provider_name: string;
|
||||
reranking_model_name: string;
|
||||
};
|
||||
top_k?: number;
|
||||
score_threshold_enabled?: boolean;
|
||||
score_threshold?: number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建知识库响应
|
||||
*/
|
||||
export interface CreateDatasetResponse {
|
||||
id: string;
|
||||
name: string;
|
||||
description: string;
|
||||
permission: string;
|
||||
data_source_type: string;
|
||||
indexing_technique: string;
|
||||
created_by: string;
|
||||
created_at: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* 知识库列表响应
|
||||
*/
|
||||
export interface DatasetListResponse {
|
||||
data: Dataset[];
|
||||
has_more: boolean;
|
||||
limit: number;
|
||||
total: number;
|
||||
page: number;
|
||||
}
|
||||
|
||||
// ==================== 文档相关类型 ====================
|
||||
|
||||
/**
|
||||
* 文档信息
|
||||
*/
|
||||
export interface Document {
|
||||
id: string;
|
||||
position: number;
|
||||
data_source_type: string;
|
||||
data_source_info: {
|
||||
upload_file_id: string;
|
||||
};
|
||||
dataset_process_rule_id: string;
|
||||
name: string;
|
||||
created_from: string;
|
||||
created_by: string;
|
||||
created_at: number;
|
||||
tokens: number;
|
||||
indexing_status: 'waiting' | 'parsing' | 'cleaning' | 'splitting' | 'indexing' | 'completed' | 'error' | 'paused';
|
||||
error?: string;
|
||||
enabled: boolean;
|
||||
disabled_at?: number;
|
||||
disabled_by?: string;
|
||||
archived: boolean;
|
||||
display_status: string;
|
||||
word_count: number;
|
||||
hit_count: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* 文档列表响应
|
||||
*/
|
||||
export interface DocumentListResponse {
|
||||
data: Document[];
|
||||
has_more: boolean;
|
||||
limit: number;
|
||||
total: number;
|
||||
page: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* 上传文件响应
|
||||
*/
|
||||
export interface UploadFileResponse {
|
||||
id: string;
|
||||
name: string;
|
||||
size: number;
|
||||
extension: string;
|
||||
mime_type: string;
|
||||
created_by: string;
|
||||
created_at: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建文档(从上传的文件)请求参数
|
||||
*/
|
||||
export interface CreateDocumentByFileRequest {
|
||||
indexing_technique: 'high_quality' | 'economy';
|
||||
process_rule: {
|
||||
rules: {
|
||||
pre_processing_rules: Array<{
|
||||
id: string;
|
||||
enabled: boolean;
|
||||
}>;
|
||||
segmentation: {
|
||||
separator: string;
|
||||
max_tokens: number;
|
||||
};
|
||||
};
|
||||
mode: 'automatic' | 'custom';
|
||||
};
|
||||
original_document_id?: string;
|
||||
doc_form?: 'text_model' | 'qa_model';
|
||||
doc_language?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建文档响应
|
||||
*/
|
||||
export interface CreateDocumentResponse {
|
||||
document: Document;
|
||||
batch: string;
|
||||
}
|
||||
|
||||
// ==================== 知识库检索相关类型 ====================
|
||||
|
||||
/**
|
||||
* 知识库检索请求参数
|
||||
*/
|
||||
export interface RetrievalRequest {
|
||||
query: string;
|
||||
retrieval_model?: {
|
||||
search_method?: 'semantic_search' | 'full_text_search' | 'hybrid_search';
|
||||
reranking_enable?: boolean;
|
||||
reranking_model?: {
|
||||
reranking_provider_name: string;
|
||||
reranking_model_name: string;
|
||||
};
|
||||
top_k?: number;
|
||||
score_threshold_enabled?: boolean;
|
||||
score_threshold?: number;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 检索结果项
|
||||
*/
|
||||
export interface RetrievalRecord {
|
||||
segment_id: string;
|
||||
document_id: string;
|
||||
document_name: string;
|
||||
position: number;
|
||||
score: number;
|
||||
content: string;
|
||||
hit_count: number;
|
||||
word_count: number;
|
||||
segment_position: number;
|
||||
index_node_hash: string;
|
||||
metadata: Record<string, any>;
|
||||
}
|
||||
|
||||
/**
|
||||
* 知识库检索响应
|
||||
*/
|
||||
export interface RetrievalResponse {
|
||||
query: {
|
||||
content: string;
|
||||
};
|
||||
records: RetrievalRecord[];
|
||||
}
|
||||
|
||||
// ==================== 错误类型 ====================
|
||||
|
||||
/**
|
||||
* Dify API 错误响应
|
||||
*/
|
||||
export interface DifyErrorResponse {
|
||||
code: string;
|
||||
message: string;
|
||||
status: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Dify API 错误
|
||||
*/
|
||||
export class DifyError extends Error {
|
||||
code: string;
|
||||
status: number;
|
||||
|
||||
constructor(error: DifyErrorResponse) {
|
||||
super(error.message);
|
||||
this.name = 'DifyError';
|
||||
this.code = error.code;
|
||||
this.status = error.status;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
152
backend/src/common/utils/jsonParser.ts
Normal file
152
backend/src/common/utils/jsonParser.ts
Normal file
@@ -0,0 +1,152 @@
|
||||
/**
|
||||
* Phase 3: 批处理模式 - JSON解析工具
|
||||
*
|
||||
* AI的输出可能包含额外的文字说明,需要提取JSON块并解析
|
||||
*/
|
||||
|
||||
export interface ParseResult<T = any> {
|
||||
success: boolean;
|
||||
data?: T;
|
||||
error?: string;
|
||||
rawOutput: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* 从AI输出中提取JSON块
|
||||
*
|
||||
* 支持的格式:
|
||||
* 1. 纯JSON:{ "key": "value" }
|
||||
* 2. 带前言:这是提取结果:\n{ "key": "value" }
|
||||
* 3. 带后缀:{ "key": "value" }\n\n以上是提取结果
|
||||
* 4. 代码块:```json\n{ "key": "value" }\n```
|
||||
*/
|
||||
export function extractJSON(text: string): string | null {
|
||||
// 尝试1:直接查找 {...} 或 [...]
|
||||
const jsonPattern = /(\{[\s\S]*\}|\[[\s\S]*\])/;
|
||||
const match = text.match(jsonPattern);
|
||||
|
||||
if (match) {
|
||||
return match[1];
|
||||
}
|
||||
|
||||
// 尝试2:查找代码块中的JSON
|
||||
const codeBlockPattern = /```(?:json)?\s*\n?([\s\S]*?)\n?```/;
|
||||
const codeMatch = text.match(codeBlockPattern);
|
||||
|
||||
if (codeMatch) {
|
||||
return codeMatch[1].trim();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析JSON字符串
|
||||
*
|
||||
* @param jsonString JSON字符串
|
||||
* @param expectedFields 期望的字段列表(可选,用于验证)
|
||||
* @returns 解析结果
|
||||
*/
|
||||
export function parseJSON<T = any>(
|
||||
jsonString: string,
|
||||
expectedFields?: string[]
|
||||
): ParseResult<T> {
|
||||
const rawOutput = jsonString;
|
||||
|
||||
try {
|
||||
// 提取JSON块
|
||||
const extracted = extractJSON(jsonString);
|
||||
|
||||
if (!extracted) {
|
||||
return {
|
||||
success: false,
|
||||
error: '未找到JSON格式的数据',
|
||||
rawOutput,
|
||||
};
|
||||
}
|
||||
|
||||
// 解析JSON
|
||||
const data = JSON.parse(extracted) as T;
|
||||
|
||||
// 验证字段(如果提供了expectedFields)
|
||||
if (expectedFields && Array.isArray(expectedFields)) {
|
||||
const missingFields: string[] = [];
|
||||
|
||||
for (const field of expectedFields) {
|
||||
if (!(field in (data as any))) {
|
||||
missingFields.push(field);
|
||||
}
|
||||
}
|
||||
|
||||
if (missingFields.length > 0) {
|
||||
console.warn(`[JsonParser] 缺少字段: ${missingFields.join(', ')}`);
|
||||
// 为缺失字段填充默认值
|
||||
for (const field of missingFields) {
|
||||
(data as any)[field] = '未提取到';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data,
|
||||
rawOutput,
|
||||
};
|
||||
|
||||
} catch (error: any) {
|
||||
return {
|
||||
success: false,
|
||||
error: error.message,
|
||||
rawOutput,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 验证JSON数据是否符合模板要求
|
||||
*
|
||||
* @param data 解析后的数据
|
||||
* @param templateFields 模板字段定义
|
||||
* @returns 是否有效
|
||||
*/
|
||||
export function validateTemplateData(
|
||||
data: any,
|
||||
templateFields: Array<{ key: string; type: string }>
|
||||
): { valid: boolean; errors: string[] } {
|
||||
const errors: string[] = [];
|
||||
|
||||
if (!data || typeof data !== 'object') {
|
||||
errors.push('数据不是有效的对象');
|
||||
return { valid: false, errors };
|
||||
}
|
||||
|
||||
for (const field of templateFields) {
|
||||
const value = data[field.key];
|
||||
|
||||
// 检查字段是否存在
|
||||
if (value === undefined || value === null || value === '') {
|
||||
console.warn(`[JsonParser] 字段 ${field.key} 为空`);
|
||||
// 不算错误,只是警告
|
||||
}
|
||||
|
||||
// 类型检查(宽松)
|
||||
if (field.type === 'number' && typeof value !== 'number' && value !== '') {
|
||||
// 尝试转换
|
||||
const num = Number(value);
|
||||
if (!isNaN(num)) {
|
||||
data[field.key] = num;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { valid: errors.length === 0, errors };
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user