Files
AIclinicalresearch/frontend-v2/src/modules/dc/pages/tool-c/components/BinningDialog_improved.tsx
HaHafeng 74cf346453 feat(dc/tool-c): Add missing value imputation feature with 6 methods and MICE
Major features:
1. Missing value imputation (6 simple methods + MICE):
   - Mean/Median/Mode/Constant imputation
   - Forward fill (ffill) and Backward fill (bfill) for time series
   - MICE multivariate imputation (in progress, shape issue to fix)

2. Auto precision detection:
   - Automatically match decimal places of original data
   - Prevent false precision (e.g. 13.57 instead of 13.566716417910449)

3. Categorical variable detection:
   - Auto-detect and skip categorical columns in MICE
   - Show warnings for unsuitable columns
   - Suggest mode imputation for categorical data

4. UI improvements:
   - Rename button: "Delete Missing" to "Missing Value Handling"
   - Remove standalone "Dedup" and "MICE" buttons
   - 3-tab dialog: Delete / Fill / Advanced Fill
   - Display column statistics and recommended methods
   - Extended warning messages (8 seconds for skipped columns)

5. Bug fixes:
   - Fix sessionService.updateSessionData -> saveProcessedData
   - Fix OperationResult interface (add message and stats)
   - Fix Toolbar button labels and removal

Modified files:
Python: operations/fillna.py (new, 556 lines), main.py (3 new endpoints)
Backend: QuickActionService.ts, QuickActionController.ts, routes/index.ts
Frontend: MissingValueDialog.tsx (new, 437 lines), Toolbar.tsx, index.tsx
Tests: test_fillna_operations.py (774 lines), test scripts and docs
Docs: 5 documentation files updated

Known issues:
- MICE imputation has DataFrame shape mismatch issue (under debugging)
- Workaround: Use 6 simple imputation methods first

Status: Development complete, MICE debugging in progress
Lines added: ~2000 lines across 3 tiers
2025-12-10 13:06:00 +08:00

342 lines
12 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* 生成分类变量(分箱)对话框 - 改进版
*
* 改进:
* 1. 显示所有列(不过滤)
* 2. 自定义切点UI更友好
* 3. 提供示例说明
*/
import React, { useState } from 'react';
import { Modal, Select, Input, Button, Radio, Space, Tag, App, Alert } from 'antd';
import { Plus, X, Info } from 'lucide-react';
interface BinningDialogProps {
visible: boolean;
columns: Array<{ id: string; name: string; type?: string }>;
sessionId: string | null;
onClose: () => void;
onApply: (newData: any[]) => void;
}
const BinningDialog: React.FC<BinningDialogProps> = ({
visible,
columns,
sessionId,
onClose,
onApply,
}) => {
const { message } = App.useApp();
const [selectedColumn, setSelectedColumn] = useState<string>('');
const [method, setMethod] = useState<'custom' | 'equal_width' | 'equal_freq'>('equal_width');
const [newColumnName, setNewColumnName] = useState('');
// 自定义切点(改进:只存储切点值,标签自动生成)
const [customBins, setCustomBins] = useState<string>('18, 60');
const [customLabels, setCustomLabels] = useState<string>('青少年, 成年, 老年');
// 等宽/等频
const [numBins, setNumBins] = useState<number>(3);
const [autoLabels, setAutoLabels] = useState<string[]>(['低', '中', '高']);
const [loading, setLoading] = useState(false);
// 更新列选择
const handleColumnChange = (value: string) => {
setSelectedColumn(value);
const column = columns.find((c) => c.id === value);
if (column) {
setNewColumnName(`${column.name}_分组`);
}
};
// 执行分箱
const handleApply = async () => {
if (!sessionId || !selectedColumn) {
message.error('请选择列');
return;
}
if (!newColumnName) {
message.warning('请输入新列名');
return;
}
let params: any = {
column: selectedColumn,
method,
newColumnName,
};
if (method === 'custom') {
// 解析切点
const binsArray = customBins.split(',').map(b => parseFloat(b.trim())).filter(b => !isNaN(b));
if (binsArray.length < 2) {
message.warning('至少需要2个切点用逗号分隔18, 60');
return;
}
// 检查是否升序
const sorted = [...binsArray].sort((a, b) => a - b);
if (JSON.stringify(binsArray) !== JSON.stringify(sorted)) {
message.warning('切点必须按从小到大排列');
return;
}
// 解析标签
const labelsArray = customLabels.split(',').map(l => l.trim()).filter(l => l);
if (labelsArray.length > 0 && labelsArray.length !== binsArray.length - 1) {
message.warning(`需要${binsArray.length - 1}个标签(切点数-1或留空自动生成`);
return;
}
params.bins = binsArray;
params.labels = labelsArray.length > 0 ? labelsArray : undefined;
} else {
// 等宽/等频
params.numBins = numBins;
// 解析标签
const labelsArray = autoLabels.filter(l => l);
if (labelsArray.length > 0 && labelsArray.length !== numBins) {
message.warning(`需要${numBins}个标签,或留空自动生成`);
return;
}
if (labelsArray.length > 0) {
params.labels = labelsArray;
}
}
setLoading(true);
try {
const response = await fetch('/api/v1/dc/tool-c/quick-action', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
sessionId,
action: 'binning',
params,
}),
});
const result = await response.json();
if (result.success) {
message.success('分箱成功!');
onApply(result.data.newDataPreview);
onClose();
} else {
message.error({
content: result.error || '分箱失败',
duration: 5,
});
}
} catch (error: any) {
console.error('[BinningDialog] 执行失败:', error);
message.error({
content: '网络错误,请检查服务是否正常运行',
duration: 5,
});
} finally {
setLoading(false);
}
};
return (
<Modal
title="📊 生成分类变量(分箱)"
open={visible}
onCancel={onClose}
width={700}
footer={null}
>
<div className="space-y-4">
{/* 选择列 */}
<div>
<label className="block text-sm font-medium text-slate-700 mb-2">
</label>
<Select
placeholder="选择列"
value={selectedColumn || undefined}
onChange={handleColumnChange}
showSearch
style={{ width: '100%' }}
filterOption={(input, option) =>
(option?.label ?? '').toLowerCase().includes(input.toLowerCase())
}
options={columns.map((col) => ({
value: col.id,
label: col.name
}))}
/>
<div className="text-xs text-slate-500 mt-1">
💡
</div>
</div>
{/* 分箱方法 */}
{selectedColumn && (
<>
<div>
<label className="block text-sm font-medium text-slate-700 mb-2">
</label>
<Radio.Group value={method} onChange={(e) => setMethod(e.target.value)}>
<Space direction="vertical">
<Radio value="equal_width">
<span className="font-medium"></span>
<span className="text-xs text-slate-500 ml-2">
</span>
</Radio>
<Radio value="equal_freq">
<span className="font-medium"></span>
<span className="text-xs text-slate-500 ml-2">
</span>
</Radio>
<Radio value="custom">
<span className="font-medium"></span>
<span className="text-xs text-slate-500 ml-2">
</span>
</Radio>
</Space>
</Radio.Group>
</div>
{/* 等宽/等频配置 */}
{(method === 'equal_width' || method === 'equal_freq') && (
<div className="bg-slate-50 p-3 rounded-lg border border-slate-200">
<div className="mb-3">
<label className="text-sm font-medium text-slate-700 mb-2 block">
</label>
<Select
value={numBins}
onChange={(value) => {
setNumBins(value);
if (value === 3) {
setAutoLabels(['低', '中', '高']);
} else if (value === 4) {
setAutoLabels(['低', '中低', '中高', '高']);
} else if (value === 5) {
setAutoLabels(['极低', '低', '中', '高', '极高']);
} else {
setAutoLabels(Array.from({ length: value }, (_, i) => `${i + 1}`));
}
}}
style={{ width: '100%' }}
options={[
{ value: 2, label: '2组二分类' },
{ value: 3, label: '3组低、中、高' },
{ value: 4, label: '4组四分位' },
{ value: 5, label: '5组五分类' },
]}
/>
</div>
<div>
<label className="text-sm font-medium text-slate-700 mb-2 block">
使
</label>
<div className="flex flex-wrap gap-2">
{autoLabels.map((label, index) => (
<Tag key={index} color="blue">
{label}
</Tag>
))}
</div>
</div>
</div>
)}
{/* 自定义切点配置(改进版) */}
{method === 'custom' && (
<div className="bg-blue-50 p-4 rounded-lg border border-blue-200">
<Alert
message="如何使用自定义切点"
description={
<div className="text-xs space-y-1 mt-2">
<div> <strong></strong> <code className="bg-white px-1">18, 60</code></div>
<div> <strong></strong>3&lt;1818-60&gt;60</div>
<div> <strong></strong> <code className="bg-white px-1">, , </code></div>
<div> <strong></strong>-1 = 23</div>
</div>
}
type="info"
showIcon
icon={<Info size={16} />}
className="mb-3"
/>
<div className="space-y-3">
<div>
<label className="text-sm font-medium text-slate-700 mb-1 block">
</label>
<Input
placeholder="如18, 60"
value={customBins}
onChange={(e) => setCustomBins(e.target.value)}
/>
</div>
<div>
<label className="text-sm font-medium text-slate-700 mb-1 block">
</label>
<Input
placeholder="如:青少年, 成年, 老年"
value={customLabels}
onChange={(e) => setCustomLabels(e.target.value)}
/>
<div className="text-xs text-slate-500 mt-1">
使[18.0, 60.0)
</div>
</div>
</div>
</div>
)}
{/* 新列名 */}
<div>
<label className="block text-sm font-medium text-slate-700 mb-2">
</label>
<Input
placeholder="输入新列名"
value={newColumnName}
onChange={(e) => setNewColumnName(e.target.value)}
/>
</div>
</>
)}
{/* 操作按钮 */}
<div className="flex items-center justify-end gap-2 pt-4 border-t border-slate-200">
<Button onClick={onClose}></Button>
<Button
type="primary"
onClick={handleApply}
loading={loading}
disabled={!selectedColumn || !newColumnName}
>
</Button>
</div>
</div>
</Modal>
);
};
export default BinningDialog;