feat(dc/tool-c): Add pivot column ordering and NA handling features

Major features:
1. Pivot transformation enhancements:
   - Add option to keep unselected columns with 3 aggregation methods
   - Maintain original column order after pivot (aligned with source file)
   - Preserve pivot value order (first appearance order)

2. NA handling across 4 core functions:
   - Recode: Support keep/map/drop for NA values
   - Filter: Already supports is_null/not_null operators
   - Binning: Support keep/label/assign for NA values (fix nan display)
   - Conditional: Add is_null/not_null operators

3. UI improvements:
   - Enable column header tooltips with custom header component
   - Add closeable alert for 50-row preview
   - Fix page scrollbar issues

Modified files:
Python: pivot.py, recode.py, binning.py, conditional.py, main.py
Backend: SessionController, QuickActionController, QuickActionService
Frontend: PivotDialog, RecodeDialog, BinningDialog, ConditionalDialog, DataGrid, index

Status: Ready for testing
This commit is contained in:
2025-12-09 14:40:14 +08:00
parent 75ceeb0653
commit f4f1d09837
19 changed files with 2314 additions and 123 deletions

View File

@@ -97,6 +97,8 @@ class RecodeRequest(BaseModel):
mapping: Dict[Any, Any]
create_new_column: bool = True
new_column_name: str = None
na_handling: str = 'keep' # ✨ 新增NA处理方式keep/map/drop
na_value: Any = None # ✨ 新增NA映射值
class BinningRequest(BaseModel):
"""分箱请求模型"""
@@ -107,6 +109,9 @@ class BinningRequest(BaseModel):
bins: List[Any] = None
labels: List[Any] = None
num_bins: int = 3
na_handling: str = 'keep' # ✨ 新增NA处理方式keep/label/assign
na_label: str = None # ✨ 新增NA标签
na_assign_to: int = None # ✨ 新增NA分配到的组索引
class ConditionalRequest(BaseModel):
"""条件生成列请求模型"""
@@ -127,6 +132,7 @@ class ComputeRequest(BaseModel):
data: List[Dict[str, Any]]
new_column_name: str
formula: str
column_mapping: List[Dict[str, str]] = [] # ✨ 新增:列名映射
class PivotRequest(BaseModel):
"""Pivot请求模型"""
@@ -135,6 +141,11 @@ class PivotRequest(BaseModel):
pivot_column: str
value_columns: List[str]
aggfunc: str = 'first'
column_mapping: List[Dict[str, str]] = [] # ✨ 列名映射
keep_unused_columns: bool = False # ✨ 是否保留未选择的列
unused_agg_method: str = 'first' # ✨ 未选择列的聚合方式first/mode/mean
original_column_order: List[str] = [] # ✨ 新增:原始列顺序
pivot_value_order: List[str] = [] # ✨ 新增:透视列值的原始顺序
# ==================== API路由 ====================
@@ -763,13 +774,15 @@ async def operation_recode(request: RecodeRequest):
# 转换为DataFrame
df = pd.DataFrame(request.data)
# 调用预写函数
# 调用预写函数传递NA处理参数
result_df = apply_recode(
df,
request.column,
request.mapping,
request.create_new_column,
request.new_column_name
request.new_column_name,
request.na_handling, # ✨ NA处理方式
request.na_value # ✨ NA映射值
)
# 转换回JSON处理NaN和inf值
@@ -840,7 +853,7 @@ async def operation_binning(request: BinningRequest):
# 转换为DataFrame
df = pd.DataFrame(request.data)
# 调用预写函数
# 调用预写函数传递NA处理参数
result_df = apply_binning(
df,
request.column,
@@ -848,7 +861,10 @@ async def operation_binning(request: BinningRequest):
request.new_column_name,
request.bins,
request.labels,
request.num_bins
request.num_bins,
request.na_handling, # ✨ NA处理方式
request.na_label, # ✨ NA标签
request.na_assign_to # ✨ NA分配到的组索引
)
# 转换回JSON处理Categorical类型、NaN值和inf值
@@ -1106,11 +1122,12 @@ async def operation_compute(request: ComputeRequest):
# 转换为DataFrame
df = pd.DataFrame(request.data)
# 调用预写函数
# 调用预写函数传递column_mapping
result_df = compute_column(
df,
request.new_column_name,
request.formula
request.formula,
request.column_mapping # ✨ 传递列名映射
)
# 转换回JSON处理NaN值和inf值
@@ -1201,13 +1218,18 @@ async def operation_pivot(request: PivotRequest):
# 转换为DataFrame
df = pd.DataFrame(request.data)
# 调用预写函数
# 调用预写函数传递column_mapping和未选择列处理参数
result_df = pivot_long_to_wide(
df,
request.index_column,
request.pivot_column,
request.value_columns,
request.aggfunc
request.aggfunc,
request.column_mapping, # ✨ 传递列名映射
request.keep_unused_columns, # ✨ 是否保留未选择的列
request.unused_agg_method, # ✨ 未选择列的聚合方式
request.original_column_order, # ✨ 原始列顺序
request.pivot_value_order # ✨ 透视列值的原始顺序
)
# 转换回JSON处理NaN和inf值