feat(dc/tool-c): Add pivot column ordering and NA handling features

Major features:
1. Pivot transformation enhancements:
   - Add option to keep unselected columns with 3 aggregation methods
   - Maintain original column order after pivot (aligned with source file)
   - Preserve pivot value order (first appearance order)

2. NA handling across 4 core functions:
   - Recode: Support keep/map/drop for NA values
   - Filter: Already supports is_null/not_null operators
   - Binning: Support keep/label/assign for NA values (fix nan display)
   - Conditional: Add is_null/not_null operators

3. UI improvements:
   - Enable column header tooltips with custom header component
   - Add closeable alert for 50-row preview
   - Fix page scrollbar issues

Modified files:
Python: pivot.py, recode.py, binning.py, conditional.py, main.py
Backend: SessionController, QuickActionController, QuickActionService
Frontend: PivotDialog, RecodeDialog, BinningDialog, ConditionalDialog, DataGrid, index

Status: Ready for testing
This commit is contained in:
2025-12-09 14:40:14 +08:00
parent 75ceeb0653
commit f4f1d09837
19 changed files with 2314 additions and 123 deletions

View File

@@ -113,8 +113,9 @@ export class QuickActionController {
});
}
// 4. 获取完整数据
// 4. 获取完整数据和session信息包含columnMapping
let fullData: any[];
let session: any;
try {
fullData = await sessionService.getFullData(sessionId);
if (!fullData || fullData.length === 0) {
@@ -124,6 +125,9 @@ export class QuickActionController {
error: '数据为空,请重新上传文件'
});
}
// ✨ 获取session信息用于compute等需要columnMapping的操作
session = await sessionService.getSession(sessionId);
} catch (error: any) {
logger.error(`[QuickAction] 获取数据失败: ${error.message}`);
return reply.code(500).send({
@@ -153,10 +157,32 @@ export class QuickActionController {
executeResult = await quickActionService.executeDropna(fullData, params);
break;
case 'compute':
executeResult = await quickActionService.executeCompute(fullData, params);
// ✨ 传递columnMapping给compute操作
executeResult = await quickActionService.executeCompute(fullData, params, session.columnMapping);
break;
case 'pivot':
executeResult = await quickActionService.executePivot(fullData, params);
// ✨ 传递columnMapping、原始列顺序和透视值顺序给pivot操作
const originalColumnOrder = session.columns || [];
// ✨ 获取透视列值的原始顺序(按首次出现顺序)
const pivotColumn = params.pivotColumn;
const seenPivotValues = new Set();
const pivotValueOrder: string[] = [];
for (const row of fullData) {
const pivotValue = row[pivotColumn];
if (pivotValue !== null && pivotValue !== undefined && !seenPivotValues.has(pivotValue)) {
seenPivotValues.add(pivotValue);
pivotValueOrder.push(String(pivotValue));
}
}
executeResult = await quickActionService.executePivot(
fullData,
params,
session.columnMapping,
originalColumnOrder,
pivotValueOrder
);
break;
}

View File

@@ -392,7 +392,7 @@ export class SessionController {
// 1. 获取完整数据
const data = await sessionService.getFullData(id);
// 2. 提取唯一值(去除空值和首尾空格
// 2. 提取唯一值(保留NA值但清理字符串
const values = data.map((row) => row[column]);
const cleanedValues = values.map((val) => {
if (val === null || val === undefined || val === '') return null;
@@ -400,12 +400,22 @@ export class SessionController {
return typeof val === 'string' ? val.trim() : val;
});
// 3. 去重
const uniqueValues = Array.from(new Set(cleanedValues))
// 3. 去重(✨ 保留null值但用特殊标记表示
const uniqueSet = Array.from(new Set(cleanedValues));
const hasNA = uniqueSet.includes(null);
const naCount = cleanedValues.filter(v => v === null).length; // ✨ 统计NA数量
// 过滤掉null和空白然后排序
const nonNAValues = uniqueSet
.filter((v) => v !== null && v !== '' && v !== '(空白)')
.sort(); // 排序,方便查看
// ✨ 如果有NA添加到数组末尾用特殊字符串标记
const uniqueValues = hasNA
? [...nonNAValues, '<空值/NA>']
: nonNAValues;
logger.info(`[SessionController] 唯一值数量: ${uniqueValues.length}`);
logger.info(`[SessionController] 唯一值数量: ${uniqueValues.length} (含NA: ${hasNA}, NA数量: ${naCount})`);
// 4. 返回结果
return reply.send({
@@ -414,6 +424,8 @@ export class SessionController {
column,
uniqueValues,
count: uniqueValues.length,
naCount: hasNA ? naCount : 0, // ✨ 返回NA数量
totalCount: data.length, // ✨ 总行数
},
});
} catch (error: any) {

View File

@@ -27,6 +27,8 @@ interface RecodeParams {
mapping: Record<string, any>;
createNewColumn: boolean;
newColumnName?: string;
naHandling?: 'keep' | 'map' | 'drop'; // ✨ 新增NA处理方式
naValue?: any; // ✨ 新增NA映射值
}
interface BinningParams {
@@ -36,6 +38,9 @@ interface BinningParams {
bins?: number[];
labels?: (string | number)[];
numBins?: number;
naHandling?: 'keep' | 'label' | 'assign'; // ✨ 新增NA处理方式
naLabel?: string; // ✨ 新增NA标签
naAssignTo?: number; // ✨ 新增NA分配到的组索引
}
interface ConditionalParams {
@@ -68,6 +73,8 @@ interface PivotParams {
pivotColumn: string;
valueColumns: string[];
aggfunc: 'first' | 'last' | 'mean' | 'sum' | 'min' | 'max';
keepUnusedColumns?: boolean; // ✨ 新增:是否保留未选择的列
unusedAggMethod?: 'first' | 'mode' | 'mean'; // ✨ 新增:未选择列的聚合方式
}
interface OperationResult {
@@ -128,6 +135,8 @@ export class QuickActionService {
mapping: params.mapping,
create_new_column: params.createNewColumn,
new_column_name: params.newColumnName,
na_handling: params.naHandling || 'keep', // ✨ NA处理方式
na_value: params.naValue, // ✨ NA映射值
}, {
timeout: 60000,
});
@@ -164,6 +173,9 @@ export class QuickActionService {
bins: params.bins,
labels: params.labels,
num_bins: params.numBins,
na_handling: params.naHandling || 'keep', // ✨ NA处理方式
na_label: params.naLabel, // ✨ NA标签
na_assign_to: params.naAssignTo, // ✨ NA分配到的组索引
}, {
timeout: 60000,
});
@@ -254,14 +266,16 @@ export class QuickActionService {
/**
* 执行计算列
*/
async executeCompute(data: any[], params: ComputeParams): Promise<OperationResult> {
async executeCompute(data: any[], params: ComputeParams, columnMapping?: any[]): Promise<OperationResult> {
try {
logger.info(`[QuickActionService] 调用计算列API: ${params.newColumnName}, formula=${params.formula}`);
// ✨ 传递columnMapping给Python方案BPython负责替换
const response = await axios.post(`${PYTHON_SERVICE_URL}/api/operations/compute`, {
data,
new_column_name: params.newColumnName,
formula: params.formula,
column_mapping: columnMapping || [], // ✨ 传递列名映射
}, {
timeout: 60000,
});
@@ -286,16 +300,28 @@ export class QuickActionService {
/**
* 执行Pivot长表→宽表
*/
async executePivot(data: any[], params: PivotParams): Promise<OperationResult> {
async executePivot(
data: any[],
params: PivotParams,
columnMapping?: any[],
originalColumnOrder?: string[],
pivotValueOrder?: string[]
): Promise<OperationResult> {
try {
logger.info(`[QuickActionService] 调用Pivot API: ${params.indexColumn} × ${params.pivotColumn}`);
// ✨ 传递columnMapping、未选择列处理参数和列顺序给Python
const response = await axios.post(`${PYTHON_SERVICE_URL}/api/operations/pivot`, {
data,
index_column: params.indexColumn,
pivot_column: params.pivotColumn,
value_columns: params.valueColumns,
aggfunc: params.aggfunc,
column_mapping: columnMapping || [], // ✨ 传递列名映射
keep_unused_columns: params.keepUnusedColumns || false, // ✨ 是否保留未选择的列
unused_agg_method: params.unusedAggMethod || 'first', // ✨ 未选择列的聚合方式
original_column_order: originalColumnOrder || [], // ✨ 原始列顺序
pivot_value_order: pivotValueOrder || [], // ✨ 透视列值的原始顺序
}, {
timeout: 60000,
});