feat(dc/tool-c): Add missing value imputation feature with 6 methods and MICE

Major features:
1. Missing value imputation (6 simple methods + MICE):
   - Mean/Median/Mode/Constant imputation
   - Forward fill (ffill) and Backward fill (bfill) for time series
   - MICE multivariate imputation (in progress, shape issue to fix)

2. Auto precision detection:
   - Automatically match decimal places of original data
   - Prevent false precision (e.g. 13.57 instead of 13.566716417910449)

3. Categorical variable detection:
   - Auto-detect and skip categorical columns in MICE
   - Show warnings for unsuitable columns
   - Suggest mode imputation for categorical data

4. UI improvements:
   - Rename button: "Delete Missing" to "Missing Value Handling"
   - Remove standalone "Dedup" and "MICE" buttons
   - 3-tab dialog: Delete / Fill / Advanced Fill
   - Display column statistics and recommended methods
   - Extended warning messages (8 seconds for skipped columns)

5. Bug fixes:
   - Fix sessionService.updateSessionData -> saveProcessedData
   - Fix OperationResult interface (add message and stats)
   - Fix Toolbar button labels and removal

Modified files:
Python: operations/fillna.py (new, 556 lines), main.py (3 new endpoints)
Backend: QuickActionService.ts, QuickActionController.ts, routes/index.ts
Frontend: MissingValueDialog.tsx (new, 437 lines), Toolbar.tsx, index.tsx
Tests: test_fillna_operations.py (774 lines), test scripts and docs
Docs: 5 documentation files updated

Known issues:
- MICE imputation has DataFrame shape mismatch issue (under debugging)
- Workaround: Use 6 simple imputation methods first

Status: Development complete, MICE debugging in progress
Lines added: ~2000 lines across 3 tiers
This commit is contained in:
2025-12-10 13:06:00 +08:00
parent f4f1d09837
commit 74cf346453
102 changed files with 3806 additions and 181 deletions

View File

@@ -389,6 +389,156 @@ export class QuickActionController {
});
}
}
/**
* POST /api/v1/dc/tool-c/fillna/stats
* 获取列的缺失值统计
*/
async handleGetFillnaStats(request: FastifyRequest, reply: FastifyReply) {
try {
const { sessionId, column } = request.body as { sessionId: string; column: string };
logger.info(`[QuickAction] 获取缺失值统计: session=${sessionId}, column=${column}`);
// 获取Session数据
const fullData = await sessionService.getFullData(sessionId);
// 调用Service获取统计
const result = await quickActionService.getFillnaStats(fullData, column);
if (!result.success) {
return reply.code(500).send({
success: false,
error: result.error
});
}
return reply.code(200).send({
success: true,
stats: result.stats,
execution_time: result.execution_time
});
} catch (error: any) {
logger.error(`[QuickAction] 获取统计失败: ${error.message}`);
return reply.code(500).send({
success: false,
error: error.message
});
}
}
/**
* POST /api/v1/dc/tool-c/fillna/simple
* 执行简单填补
*/
async handleFillnaSimple(request: FastifyRequest, reply: FastifyReply) {
try {
const { sessionId, column, newColumnName, method, fillValue } = request.body as {
sessionId: string;
column: string;
newColumnName: string;
method: string;
fillValue?: any;
};
logger.info(`[QuickAction] 执行简单填补: session=${sessionId}, method=${method}, column=${column}`);
// 获取Session数据
const fullData = await sessionService.getFullData(sessionId);
// 调用Service执行填补
const result = await quickActionService.executeFillnaSimple(fullData, {
column,
newColumnName,
method: method as any,
fillValue
});
if (!result.success) {
return reply.code(500).send({
success: false,
error: result.error
});
}
// 更新Session数据
const resultData = result.result_data || [];
await sessionService.saveProcessedData(sessionId, resultData);
return reply.code(200).send({
success: true,
data: {
newDataPreview: resultData.slice(0, 50),
affectedRows: resultData.length,
message: result.message || '填补成功',
stats: result.stats
}
});
} catch (error: any) {
logger.error(`[QuickAction] 简单填补失败: ${error.message}`);
return reply.code(500).send({
success: false,
error: error.message
});
}
}
/**
* POST /api/v1/dc/tool-c/fillna/mice
* 执行MICE多重插补
*/
async handleFillnaMice(request: FastifyRequest, reply: FastifyReply) {
try {
const { sessionId, columns, nIterations, randomState } = request.body as {
sessionId: string;
columns: string[];
nIterations?: number;
randomState?: number;
};
logger.info(`[QuickAction] 执行MICE填补: session=${sessionId}, columns=${columns.length}`);
// 获取Session数据
const fullData = await sessionService.getFullData(sessionId);
// 调用Service执行MICE填补
const result = await quickActionService.executeFillnaMice(fullData, {
columns,
nIterations,
randomState
});
if (!result.success) {
return reply.code(500).send({
success: false,
error: result.error
});
}
// 更新Session数据
const resultData = result.result_data || [];
await sessionService.saveProcessedData(sessionId, resultData);
return reply.code(200).send({
success: true,
data: {
newDataPreview: resultData.slice(0, 50),
affectedRows: resultData.length,
message: result.message || 'MICE填补成功',
stats: result.stats
}
});
} catch (error: any) {
logger.error(`[QuickAction] MICE填补失败: ${error.message}`);
return reply.code(500).send({
success: false,
error: error.message
});
}
}
}
// ==================== 导出单例 ====================

View File

@@ -228,3 +228,5 @@ export const streamAIController = new StreamAIController();