feat(dc/tool-c): Add missing value imputation feature with 6 methods and MICE

Major features:
1. Missing value imputation (6 simple methods + MICE):
   - Mean/Median/Mode/Constant imputation
   - Forward fill (ffill) and Backward fill (bfill) for time series
   - MICE multivariate imputation (in progress, shape issue to fix)

2. Auto precision detection:
   - Automatically match decimal places of original data
   - Prevent false precision (e.g. 13.57 instead of 13.566716417910449)

3. Categorical variable detection:
   - Auto-detect and skip categorical columns in MICE
   - Show warnings for unsuitable columns
   - Suggest mode imputation for categorical data

4. UI improvements:
   - Rename button: "Delete Missing" to "Missing Value Handling"
   - Remove standalone "Dedup" and "MICE" buttons
   - 3-tab dialog: Delete / Fill / Advanced Fill
   - Display column statistics and recommended methods
   - Extended warning messages (8 seconds for skipped columns)

5. Bug fixes:
   - Fix sessionService.updateSessionData -> saveProcessedData
   - Fix OperationResult interface (add message and stats)
   - Fix Toolbar button labels and removal

Modified files:
Python: operations/fillna.py (new, 556 lines), main.py (3 new endpoints)
Backend: QuickActionService.ts, QuickActionController.ts, routes/index.ts
Frontend: MissingValueDialog.tsx (new, 437 lines), Toolbar.tsx, index.tsx
Tests: test_fillna_operations.py (774 lines), test scripts and docs
Docs: 5 documentation files updated

Known issues:
- MICE imputation has DataFrame shape mismatch issue (under debugging)
- Workaround: Use 6 simple imputation methods first

Status: Development complete, MICE debugging in progress
Lines added: ~2000 lines across 3 tiers
This commit is contained in:
2025-12-10 13:06:00 +08:00
parent f4f1d09837
commit 74cf346453
102 changed files with 3806 additions and 181 deletions

View File

@@ -77,6 +77,19 @@ interface PivotParams {
unusedAggMethod?: 'first' | 'mode' | 'mean'; // ✨ 新增:未选择列的聚合方式
}
interface FillnaSimpleParams {
column: string;
newColumnName: string;
method: 'mean' | 'median' | 'mode' | 'constant' | 'ffill' | 'bfill';
fillValue?: any;
}
interface FillnaMiceParams {
columns: string[];
nIterations?: number;
randomState?: number;
}
interface OperationResult {
success: boolean;
result_data?: any[];
@@ -84,6 +97,8 @@ interface OperationResult {
execution_time?: number;
result_shape?: [number, number];
error?: string;
message?: string;
stats?: any;
}
// ==================== 服务类 ====================
@@ -342,6 +357,104 @@ export class QuickActionService {
};
}
}
/**
* 获取列的缺失值统计
*/
async getFillnaStats(data: any[], column: string): Promise<OperationResult> {
try {
logger.info(`[QuickActionService] 获取列 '${column}' 的缺失值统计`);
const response = await axios.post(`${PYTHON_SERVICE_URL}/api/operations/fillna-stats`, {
data,
column,
}, {
timeout: 10000,
});
logger.info(`[QuickActionService] 获取统计成功`);
return response.data;
} catch (error: any) {
logger.error(`[QuickActionService] 获取统计失败: ${error.message}`);
if (error.response?.data) {
return error.response.data;
}
return {
success: false,
error: error.message || '获取统计失败',
};
}
}
/**
* 执行简单填补
*/
async executeFillnaSimple(data: any[], params: FillnaSimpleParams): Promise<OperationResult> {
try {
logger.info(`[QuickActionService] 调用简单填补API: ${params.method} on '${params.column}'`);
const response = await axios.post(`${PYTHON_SERVICE_URL}/api/operations/fillna-simple`, {
data,
column: params.column,
new_column_name: params.newColumnName,
method: params.method,
fill_value: params.fillValue,
}, {
timeout: 60000,
});
logger.info(`[QuickActionService] 简单填补成功`);
return response.data;
} catch (error: any) {
logger.error(`[QuickActionService] 简单填补失败: ${error.message}`);
if (error.response?.data) {
return error.response.data;
}
return {
success: false,
error: error.message || '简单填补失败',
};
}
}
/**
* 执行MICE多重插补
*/
async executeFillnaMice(data: any[], params: FillnaMiceParams): Promise<OperationResult> {
try {
logger.info(`[QuickActionService] 调用MICE填补API: ${params.columns.length}`);
const response = await axios.post(`${PYTHON_SERVICE_URL}/api/operations/fillna-mice`, {
data,
columns: params.columns,
n_iterations: params.nIterations || 10,
random_state: params.randomState || 42,
}, {
timeout: 300000, // MICE可能需要更长时间5分钟
});
logger.info(`[QuickActionService] MICE填补成功`);
return response.data;
} catch (error: any) {
logger.error(`[QuickActionService] MICE填补失败: ${error.message}`);
if (error.response?.data) {
return error.response.data;
}
return {
success: false,
error: error.message || 'MICE填补失败',
};
}
}
}
// ==================== 导出单例 ====================