feat(dc/tool-c): Add missing value imputation feature with 6 methods and MICE
Major features: 1. Missing value imputation (6 simple methods + MICE): - Mean/Median/Mode/Constant imputation - Forward fill (ffill) and Backward fill (bfill) for time series - MICE multivariate imputation (in progress, shape issue to fix) 2. Auto precision detection: - Automatically match decimal places of original data - Prevent false precision (e.g. 13.57 instead of 13.566716417910449) 3. Categorical variable detection: - Auto-detect and skip categorical columns in MICE - Show warnings for unsuitable columns - Suggest mode imputation for categorical data 4. UI improvements: - Rename button: "Delete Missing" to "Missing Value Handling" - Remove standalone "Dedup" and "MICE" buttons - 3-tab dialog: Delete / Fill / Advanced Fill - Display column statistics and recommended methods - Extended warning messages (8 seconds for skipped columns) 5. Bug fixes: - Fix sessionService.updateSessionData -> saveProcessedData - Fix OperationResult interface (add message and stats) - Fix Toolbar button labels and removal Modified files: Python: operations/fillna.py (new, 556 lines), main.py (3 new endpoints) Backend: QuickActionService.ts, QuickActionController.ts, routes/index.ts Frontend: MissingValueDialog.tsx (new, 437 lines), Toolbar.tsx, index.tsx Tests: test_fillna_operations.py (774 lines), test scripts and docs Docs: 5 documentation files updated Known issues: - MICE imputation has DataFrame shape mismatch issue (under debugging) - Workaround: Use 6 simple imputation methods first Status: Development complete, MICE debugging in progress Lines added: ~2000 lines across 3 tiers
This commit is contained in:
@@ -308,4 +308,6 @@ runTests().catch((error) => {
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -249,4 +249,6 @@ runTest()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -287,4 +287,6 @@ Content-Type: application/json
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -366,4 +366,6 @@ export class ExcelExporter {
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -224,3 +224,5 @@ export const conflictDetectionService = new ConflictDetectionService();
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -252,3 +252,5 @@ export const templateService = new TemplateService();
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -174,3 +174,5 @@ curl -X POST http://localhost:3000/api/v1/dc/tool-c/test/execute \
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -389,6 +389,156 @@ export class QuickActionController {
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* POST /api/v1/dc/tool-c/fillna/stats
|
||||
* 获取列的缺失值统计
|
||||
*/
|
||||
async handleGetFillnaStats(request: FastifyRequest, reply: FastifyReply) {
|
||||
try {
|
||||
const { sessionId, column } = request.body as { sessionId: string; column: string };
|
||||
|
||||
logger.info(`[QuickAction] 获取缺失值统计: session=${sessionId}, column=${column}`);
|
||||
|
||||
// 获取Session数据
|
||||
const fullData = await sessionService.getFullData(sessionId);
|
||||
|
||||
// 调用Service获取统计
|
||||
const result = await quickActionService.getFillnaStats(fullData, column);
|
||||
|
||||
if (!result.success) {
|
||||
return reply.code(500).send({
|
||||
success: false,
|
||||
error: result.error
|
||||
});
|
||||
}
|
||||
|
||||
return reply.code(200).send({
|
||||
success: true,
|
||||
stats: result.stats,
|
||||
execution_time: result.execution_time
|
||||
});
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error(`[QuickAction] 获取统计失败: ${error.message}`);
|
||||
return reply.code(500).send({
|
||||
success: false,
|
||||
error: error.message
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* POST /api/v1/dc/tool-c/fillna/simple
|
||||
* 执行简单填补
|
||||
*/
|
||||
async handleFillnaSimple(request: FastifyRequest, reply: FastifyReply) {
|
||||
try {
|
||||
const { sessionId, column, newColumnName, method, fillValue } = request.body as {
|
||||
sessionId: string;
|
||||
column: string;
|
||||
newColumnName: string;
|
||||
method: string;
|
||||
fillValue?: any;
|
||||
};
|
||||
|
||||
logger.info(`[QuickAction] 执行简单填补: session=${sessionId}, method=${method}, column=${column}`);
|
||||
|
||||
// 获取Session数据
|
||||
const fullData = await sessionService.getFullData(sessionId);
|
||||
|
||||
// 调用Service执行填补
|
||||
const result = await quickActionService.executeFillnaSimple(fullData, {
|
||||
column,
|
||||
newColumnName,
|
||||
method: method as any,
|
||||
fillValue
|
||||
});
|
||||
|
||||
if (!result.success) {
|
||||
return reply.code(500).send({
|
||||
success: false,
|
||||
error: result.error
|
||||
});
|
||||
}
|
||||
|
||||
// 更新Session数据
|
||||
const resultData = result.result_data || [];
|
||||
await sessionService.saveProcessedData(sessionId, resultData);
|
||||
|
||||
return reply.code(200).send({
|
||||
success: true,
|
||||
data: {
|
||||
newDataPreview: resultData.slice(0, 50),
|
||||
affectedRows: resultData.length,
|
||||
message: result.message || '填补成功',
|
||||
stats: result.stats
|
||||
}
|
||||
});
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error(`[QuickAction] 简单填补失败: ${error.message}`);
|
||||
return reply.code(500).send({
|
||||
success: false,
|
||||
error: error.message
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* POST /api/v1/dc/tool-c/fillna/mice
|
||||
* 执行MICE多重插补
|
||||
*/
|
||||
async handleFillnaMice(request: FastifyRequest, reply: FastifyReply) {
|
||||
try {
|
||||
const { sessionId, columns, nIterations, randomState } = request.body as {
|
||||
sessionId: string;
|
||||
columns: string[];
|
||||
nIterations?: number;
|
||||
randomState?: number;
|
||||
};
|
||||
|
||||
logger.info(`[QuickAction] 执行MICE填补: session=${sessionId}, columns=${columns.length}个`);
|
||||
|
||||
// 获取Session数据
|
||||
const fullData = await sessionService.getFullData(sessionId);
|
||||
|
||||
// 调用Service执行MICE填补
|
||||
const result = await quickActionService.executeFillnaMice(fullData, {
|
||||
columns,
|
||||
nIterations,
|
||||
randomState
|
||||
});
|
||||
|
||||
if (!result.success) {
|
||||
return reply.code(500).send({
|
||||
success: false,
|
||||
error: result.error
|
||||
});
|
||||
}
|
||||
|
||||
// 更新Session数据
|
||||
const resultData = result.result_data || [];
|
||||
await sessionService.saveProcessedData(sessionId, resultData);
|
||||
|
||||
return reply.code(200).send({
|
||||
success: true,
|
||||
data: {
|
||||
newDataPreview: resultData.slice(0, 50),
|
||||
affectedRows: resultData.length,
|
||||
message: result.message || 'MICE填补成功',
|
||||
stats: result.stats
|
||||
}
|
||||
});
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error(`[QuickAction] MICE填补失败: ${error.message}`);
|
||||
return reply.code(500).send({
|
||||
success: false,
|
||||
error: error.message
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== 导出单例 ====================
|
||||
|
||||
@@ -228,3 +228,5 @@ export const streamAIController = new StreamAIController();
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -116,5 +116,22 @@ export async function toolCRoutes(fastify: FastifyInstance) {
|
||||
fastify.post('/quick-action/preview', {
|
||||
handler: quickActionController.preview.bind(quickActionController),
|
||||
});
|
||||
|
||||
// ✨ 缺失值处理(新增)
|
||||
|
||||
// 获取列的缺失值统计
|
||||
fastify.post('/fillna/stats', {
|
||||
handler: quickActionController.handleGetFillnaStats.bind(quickActionController),
|
||||
});
|
||||
|
||||
// 执行简单填补
|
||||
fastify.post('/fillna/simple', {
|
||||
handler: quickActionController.handleFillnaSimple.bind(quickActionController),
|
||||
});
|
||||
|
||||
// 执行MICE多重插补
|
||||
fastify.post('/fillna/mice', {
|
||||
handler: quickActionController.handleFillnaMice.bind(quickActionController),
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -77,6 +77,19 @@ interface PivotParams {
|
||||
unusedAggMethod?: 'first' | 'mode' | 'mean'; // ✨ 新增:未选择列的聚合方式
|
||||
}
|
||||
|
||||
interface FillnaSimpleParams {
|
||||
column: string;
|
||||
newColumnName: string;
|
||||
method: 'mean' | 'median' | 'mode' | 'constant' | 'ffill' | 'bfill';
|
||||
fillValue?: any;
|
||||
}
|
||||
|
||||
interface FillnaMiceParams {
|
||||
columns: string[];
|
||||
nIterations?: number;
|
||||
randomState?: number;
|
||||
}
|
||||
|
||||
interface OperationResult {
|
||||
success: boolean;
|
||||
result_data?: any[];
|
||||
@@ -84,6 +97,8 @@ interface OperationResult {
|
||||
execution_time?: number;
|
||||
result_shape?: [number, number];
|
||||
error?: string;
|
||||
message?: string;
|
||||
stats?: any;
|
||||
}
|
||||
|
||||
// ==================== 服务类 ====================
|
||||
@@ -342,6 +357,104 @@ export class QuickActionService {
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取列的缺失值统计
|
||||
*/
|
||||
async getFillnaStats(data: any[], column: string): Promise<OperationResult> {
|
||||
try {
|
||||
logger.info(`[QuickActionService] 获取列 '${column}' 的缺失值统计`);
|
||||
|
||||
const response = await axios.post(`${PYTHON_SERVICE_URL}/api/operations/fillna-stats`, {
|
||||
data,
|
||||
column,
|
||||
}, {
|
||||
timeout: 10000,
|
||||
});
|
||||
|
||||
logger.info(`[QuickActionService] 获取统计成功`);
|
||||
return response.data;
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error(`[QuickActionService] 获取统计失败: ${error.message}`);
|
||||
|
||||
if (error.response?.data) {
|
||||
return error.response.data;
|
||||
}
|
||||
|
||||
return {
|
||||
success: false,
|
||||
error: error.message || '获取统计失败',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行简单填补
|
||||
*/
|
||||
async executeFillnaSimple(data: any[], params: FillnaSimpleParams): Promise<OperationResult> {
|
||||
try {
|
||||
logger.info(`[QuickActionService] 调用简单填补API: ${params.method} on '${params.column}'`);
|
||||
|
||||
const response = await axios.post(`${PYTHON_SERVICE_URL}/api/operations/fillna-simple`, {
|
||||
data,
|
||||
column: params.column,
|
||||
new_column_name: params.newColumnName,
|
||||
method: params.method,
|
||||
fill_value: params.fillValue,
|
||||
}, {
|
||||
timeout: 60000,
|
||||
});
|
||||
|
||||
logger.info(`[QuickActionService] 简单填补成功`);
|
||||
return response.data;
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error(`[QuickActionService] 简单填补失败: ${error.message}`);
|
||||
|
||||
if (error.response?.data) {
|
||||
return error.response.data;
|
||||
}
|
||||
|
||||
return {
|
||||
success: false,
|
||||
error: error.message || '简单填补失败',
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行MICE多重插补
|
||||
*/
|
||||
async executeFillnaMice(data: any[], params: FillnaMiceParams): Promise<OperationResult> {
|
||||
try {
|
||||
logger.info(`[QuickActionService] 调用MICE填补API: ${params.columns.length} 列`);
|
||||
|
||||
const response = await axios.post(`${PYTHON_SERVICE_URL}/api/operations/fillna-mice`, {
|
||||
data,
|
||||
columns: params.columns,
|
||||
n_iterations: params.nIterations || 10,
|
||||
random_state: params.randomState || 42,
|
||||
}, {
|
||||
timeout: 300000, // MICE可能需要更长时间(5分钟)
|
||||
});
|
||||
|
||||
logger.info(`[QuickActionService] MICE填补成功`);
|
||||
return response.data;
|
||||
|
||||
} catch (error: any) {
|
||||
logger.error(`[QuickActionService] MICE填补失败: ${error.message}`);
|
||||
|
||||
if (error.response?.data) {
|
||||
return error.response.data;
|
||||
}
|
||||
|
||||
return {
|
||||
success: false,
|
||||
error: error.message || 'MICE填补失败',
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== 导出单例 ====================
|
||||
|
||||
Reference in New Issue
Block a user