Major features: 1. Missing value imputation (6 simple methods + MICE): - Mean/Median/Mode/Constant imputation - Forward fill (ffill) and Backward fill (bfill) for time series - MICE multivariate imputation (in progress, shape issue to fix) 2. Auto precision detection: - Automatically match decimal places of original data - Prevent false precision (e.g. 13.57 instead of 13.566716417910449) 3. Categorical variable detection: - Auto-detect and skip categorical columns in MICE - Show warnings for unsuitable columns - Suggest mode imputation for categorical data 4. UI improvements: - Rename button: "Delete Missing" to "Missing Value Handling" - Remove standalone "Dedup" and "MICE" buttons - 3-tab dialog: Delete / Fill / Advanced Fill - Display column statistics and recommended methods - Extended warning messages (8 seconds for skipped columns) 5. Bug fixes: - Fix sessionService.updateSessionData -> saveProcessedData - Fix OperationResult interface (add message and stats) - Fix Toolbar button labels and removal Modified files: Python: operations/fillna.py (new, 556 lines), main.py (3 new endpoints) Backend: QuickActionService.ts, QuickActionController.ts, routes/index.ts Frontend: MissingValueDialog.tsx (new, 437 lines), Toolbar.tsx, index.tsx Tests: test_fillna_operations.py (774 lines), test scripts and docs Docs: 5 documentation files updated Known issues: - MICE imputation has DataFrame shape mismatch issue (under debugging) - Workaround: Use 6 simple imputation methods first Status: Development complete, MICE debugging in progress Lines added: ~2000 lines across 3 tiers
342 lines
12 KiB
TypeScript
342 lines
12 KiB
TypeScript
/**
|
||
* 生成分类变量(分箱)对话框 - 改进版
|
||
*
|
||
* 改进:
|
||
* 1. 显示所有列(不过滤)
|
||
* 2. 自定义切点UI更友好
|
||
* 3. 提供示例说明
|
||
*/
|
||
|
||
import React, { useState } from 'react';
|
||
import { Modal, Select, Input, Button, Radio, Space, Tag, App, Alert } from 'antd';
|
||
import { Plus, X, Info } from 'lucide-react';
|
||
|
||
interface BinningDialogProps {
|
||
visible: boolean;
|
||
columns: Array<{ id: string; name: string; type?: string }>;
|
||
sessionId: string | null;
|
||
onClose: () => void;
|
||
onApply: (newData: any[]) => void;
|
||
}
|
||
|
||
const BinningDialog: React.FC<BinningDialogProps> = ({
|
||
visible,
|
||
columns,
|
||
sessionId,
|
||
onClose,
|
||
onApply,
|
||
}) => {
|
||
const { message } = App.useApp();
|
||
const [selectedColumn, setSelectedColumn] = useState<string>('');
|
||
const [method, setMethod] = useState<'custom' | 'equal_width' | 'equal_freq'>('equal_width');
|
||
const [newColumnName, setNewColumnName] = useState('');
|
||
|
||
// 自定义切点(改进:只存储切点值,标签自动生成)
|
||
const [customBins, setCustomBins] = useState<string>('18, 60');
|
||
const [customLabels, setCustomLabels] = useState<string>('青少年, 成年, 老年');
|
||
|
||
// 等宽/等频
|
||
const [numBins, setNumBins] = useState<number>(3);
|
||
const [autoLabels, setAutoLabels] = useState<string[]>(['低', '中', '高']);
|
||
|
||
const [loading, setLoading] = useState(false);
|
||
|
||
// 更新列选择
|
||
const handleColumnChange = (value: string) => {
|
||
setSelectedColumn(value);
|
||
const column = columns.find((c) => c.id === value);
|
||
if (column) {
|
||
setNewColumnName(`${column.name}_分组`);
|
||
}
|
||
};
|
||
|
||
// 执行分箱
|
||
const handleApply = async () => {
|
||
if (!sessionId || !selectedColumn) {
|
||
message.error('请选择列');
|
||
return;
|
||
}
|
||
|
||
if (!newColumnName) {
|
||
message.warning('请输入新列名');
|
||
return;
|
||
}
|
||
|
||
let params: any = {
|
||
column: selectedColumn,
|
||
method,
|
||
newColumnName,
|
||
};
|
||
|
||
if (method === 'custom') {
|
||
// 解析切点
|
||
const binsArray = customBins.split(',').map(b => parseFloat(b.trim())).filter(b => !isNaN(b));
|
||
if (binsArray.length < 2) {
|
||
message.warning('至少需要2个切点(用逗号分隔,如:18, 60)');
|
||
return;
|
||
}
|
||
|
||
// 检查是否升序
|
||
const sorted = [...binsArray].sort((a, b) => a - b);
|
||
if (JSON.stringify(binsArray) !== JSON.stringify(sorted)) {
|
||
message.warning('切点必须按从小到大排列');
|
||
return;
|
||
}
|
||
|
||
// 解析标签
|
||
const labelsArray = customLabels.split(',').map(l => l.trim()).filter(l => l);
|
||
if (labelsArray.length > 0 && labelsArray.length !== binsArray.length - 1) {
|
||
message.warning(`需要${binsArray.length - 1}个标签(切点数-1),或留空自动生成`);
|
||
return;
|
||
}
|
||
|
||
params.bins = binsArray;
|
||
params.labels = labelsArray.length > 0 ? labelsArray : undefined;
|
||
|
||
} else {
|
||
// 等宽/等频
|
||
params.numBins = numBins;
|
||
|
||
// 解析标签
|
||
const labelsArray = autoLabels.filter(l => l);
|
||
if (labelsArray.length > 0 && labelsArray.length !== numBins) {
|
||
message.warning(`需要${numBins}个标签,或留空自动生成`);
|
||
return;
|
||
}
|
||
|
||
if (labelsArray.length > 0) {
|
||
params.labels = labelsArray;
|
||
}
|
||
}
|
||
|
||
setLoading(true);
|
||
try {
|
||
const response = await fetch('/api/v1/dc/tool-c/quick-action', {
|
||
method: 'POST',
|
||
headers: { 'Content-Type': 'application/json' },
|
||
body: JSON.stringify({
|
||
sessionId,
|
||
action: 'binning',
|
||
params,
|
||
}),
|
||
});
|
||
|
||
const result = await response.json();
|
||
|
||
if (result.success) {
|
||
message.success('分箱成功!');
|
||
onApply(result.data.newDataPreview);
|
||
onClose();
|
||
} else {
|
||
message.error({
|
||
content: result.error || '分箱失败',
|
||
duration: 5,
|
||
});
|
||
}
|
||
} catch (error: any) {
|
||
console.error('[BinningDialog] 执行失败:', error);
|
||
message.error({
|
||
content: '网络错误,请检查服务是否正常运行',
|
||
duration: 5,
|
||
});
|
||
} finally {
|
||
setLoading(false);
|
||
}
|
||
};
|
||
|
||
return (
|
||
<Modal
|
||
title="📊 生成分类变量(分箱)"
|
||
open={visible}
|
||
onCancel={onClose}
|
||
width={700}
|
||
footer={null}
|
||
>
|
||
<div className="space-y-4">
|
||
{/* 选择列 */}
|
||
<div>
|
||
<label className="block text-sm font-medium text-slate-700 mb-2">
|
||
选择连续数值列:
|
||
</label>
|
||
<Select
|
||
placeholder="选择列"
|
||
value={selectedColumn || undefined}
|
||
onChange={handleColumnChange}
|
||
showSearch
|
||
style={{ width: '100%' }}
|
||
filterOption={(input, option) =>
|
||
(option?.label ?? '').toLowerCase().includes(input.toLowerCase())
|
||
}
|
||
options={columns.map((col) => ({
|
||
value: col.id,
|
||
label: col.name
|
||
}))}
|
||
/>
|
||
<div className="text-xs text-slate-500 mt-1">
|
||
💡 提示:如果选择的列不是数值类型,系统会自动尝试转换
|
||
</div>
|
||
</div>
|
||
|
||
{/* 分箱方法 */}
|
||
{selectedColumn && (
|
||
<>
|
||
<div>
|
||
<label className="block text-sm font-medium text-slate-700 mb-2">
|
||
分箱方法:
|
||
</label>
|
||
<Radio.Group value={method} onChange={(e) => setMethod(e.target.value)}>
|
||
<Space direction="vertical">
|
||
<Radio value="equal_width">
|
||
<span className="font-medium">等宽分箱(推荐)</span>
|
||
<span className="text-xs text-slate-500 ml-2">
|
||
将数值范围等分
|
||
</span>
|
||
</Radio>
|
||
<Radio value="equal_freq">
|
||
<span className="font-medium">等频分箱</span>
|
||
<span className="text-xs text-slate-500 ml-2">
|
||
每组样本量相等
|
||
</span>
|
||
</Radio>
|
||
<Radio value="custom">
|
||
<span className="font-medium">自定义切点</span>
|
||
<span className="text-xs text-slate-500 ml-2">
|
||
指定具体的分割点
|
||
</span>
|
||
</Radio>
|
||
</Space>
|
||
</Radio.Group>
|
||
</div>
|
||
|
||
{/* 等宽/等频配置 */}
|
||
{(method === 'equal_width' || method === 'equal_freq') && (
|
||
<div className="bg-slate-50 p-3 rounded-lg border border-slate-200">
|
||
<div className="mb-3">
|
||
<label className="text-sm font-medium text-slate-700 mb-2 block">
|
||
分组数量:
|
||
</label>
|
||
<Select
|
||
value={numBins}
|
||
onChange={(value) => {
|
||
setNumBins(value);
|
||
if (value === 3) {
|
||
setAutoLabels(['低', '中', '高']);
|
||
} else if (value === 4) {
|
||
setAutoLabels(['低', '中低', '中高', '高']);
|
||
} else if (value === 5) {
|
||
setAutoLabels(['极低', '低', '中', '高', '极高']);
|
||
} else {
|
||
setAutoLabels(Array.from({ length: value }, (_, i) => `组${i + 1}`));
|
||
}
|
||
}}
|
||
style={{ width: '100%' }}
|
||
options={[
|
||
{ value: 2, label: '2组(二分类)' },
|
||
{ value: 3, label: '3组(低、中、高)' },
|
||
{ value: 4, label: '4组(四分位)' },
|
||
{ value: 5, label: '5组(五分类)' },
|
||
]}
|
||
/>
|
||
</div>
|
||
|
||
<div>
|
||
<label className="text-sm font-medium text-slate-700 mb-2 block">
|
||
标签(可选,留空则使用区间):
|
||
</label>
|
||
<div className="flex flex-wrap gap-2">
|
||
{autoLabels.map((label, index) => (
|
||
<Tag key={index} color="blue">
|
||
{label}
|
||
</Tag>
|
||
))}
|
||
</div>
|
||
</div>
|
||
</div>
|
||
)}
|
||
|
||
{/* 自定义切点配置(改进版) */}
|
||
{method === 'custom' && (
|
||
<div className="bg-blue-50 p-4 rounded-lg border border-blue-200">
|
||
<Alert
|
||
message="如何使用自定义切点"
|
||
description={
|
||
<div className="text-xs space-y-1 mt-2">
|
||
<div>• <strong>切点</strong>:用逗号分隔的数字,如 <code className="bg-white px-1">18, 60</code></div>
|
||
<div>• <strong>结果</strong>:生成3组(<18、18-60、>60)</div>
|
||
<div>• <strong>标签</strong>:可选,用逗号分隔,如 <code className="bg-white px-1">青少年, 成年, 老年</code></div>
|
||
<div>• <strong>注意</strong>:切点数量-1 = 标签数量(如2个切点需要3个标签)</div>
|
||
</div>
|
||
}
|
||
type="info"
|
||
showIcon
|
||
icon={<Info size={16} />}
|
||
className="mb-3"
|
||
/>
|
||
|
||
<div className="space-y-3">
|
||
<div>
|
||
<label className="text-sm font-medium text-slate-700 mb-1 block">
|
||
切点(用逗号分隔,必须从小到大):
|
||
</label>
|
||
<Input
|
||
placeholder="如:18, 60"
|
||
value={customBins}
|
||
onChange={(e) => setCustomBins(e.target.value)}
|
||
/>
|
||
</div>
|
||
|
||
<div>
|
||
<label className="text-sm font-medium text-slate-700 mb-1 block">
|
||
标签(可选,用逗号分隔):
|
||
</label>
|
||
<Input
|
||
placeholder="如:青少年, 成年, 老年"
|
||
value={customLabels}
|
||
onChange={(e) => setCustomLabels(e.target.value)}
|
||
/>
|
||
<div className="text-xs text-slate-500 mt-1">
|
||
留空则使用默认区间标签(如:[18.0, 60.0))
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
)}
|
||
|
||
{/* 新列名 */}
|
||
<div>
|
||
<label className="block text-sm font-medium text-slate-700 mb-2">
|
||
新列名:
|
||
</label>
|
||
<Input
|
||
placeholder="输入新列名"
|
||
value={newColumnName}
|
||
onChange={(e) => setNewColumnName(e.target.value)}
|
||
/>
|
||
</div>
|
||
</>
|
||
)}
|
||
|
||
{/* 操作按钮 */}
|
||
<div className="flex items-center justify-end gap-2 pt-4 border-t border-slate-200">
|
||
<Button onClick={onClose}>取消</Button>
|
||
<Button
|
||
type="primary"
|
||
onClick={handleApply}
|
||
loading={loading}
|
||
disabled={!selectedColumn || !newColumnName}
|
||
>
|
||
执行分箱
|
||
</Button>
|
||
</div>
|
||
</div>
|
||
</Modal>
|
||
);
|
||
};
|
||
|
||
export default BinningDialog;
|
||
|
||
|
||
|
||
|
||
|