feat(dc/tool-c): Add pivot column ordering and NA handling features
Major features: 1. Pivot transformation enhancements: - Add option to keep unselected columns with 3 aggregation methods - Maintain original column order after pivot (aligned with source file) - Preserve pivot value order (first appearance order) 2. NA handling across 4 core functions: - Recode: Support keep/map/drop for NA values - Filter: Already supports is_null/not_null operators - Binning: Support keep/label/assign for NA values (fix nan display) - Conditional: Add is_null/not_null operators 3. UI improvements: - Enable column header tooltips with custom header component - Add closeable alert for 50-row preview - Fix page scrollbar issues Modified files: Python: pivot.py, recode.py, binning.py, conditional.py, main.py Backend: SessionController, QuickActionController, QuickActionService Frontend: PivotDialog, RecodeDialog, BinningDialog, ConditionalDialog, DataGrid, index Status: Ready for testing
This commit is contained in:
@@ -97,6 +97,8 @@ class RecodeRequest(BaseModel):
|
||||
mapping: Dict[Any, Any]
|
||||
create_new_column: bool = True
|
||||
new_column_name: str = None
|
||||
na_handling: str = 'keep' # ✨ 新增:NA处理方式(keep/map/drop)
|
||||
na_value: Any = None # ✨ 新增:NA映射值
|
||||
|
||||
class BinningRequest(BaseModel):
|
||||
"""分箱请求模型"""
|
||||
@@ -107,6 +109,9 @@ class BinningRequest(BaseModel):
|
||||
bins: List[Any] = None
|
||||
labels: List[Any] = None
|
||||
num_bins: int = 3
|
||||
na_handling: str = 'keep' # ✨ 新增:NA处理方式(keep/label/assign)
|
||||
na_label: str = None # ✨ 新增:NA标签
|
||||
na_assign_to: int = None # ✨ 新增:NA分配到的组索引
|
||||
|
||||
class ConditionalRequest(BaseModel):
|
||||
"""条件生成列请求模型"""
|
||||
@@ -127,6 +132,7 @@ class ComputeRequest(BaseModel):
|
||||
data: List[Dict[str, Any]]
|
||||
new_column_name: str
|
||||
formula: str
|
||||
column_mapping: List[Dict[str, str]] = [] # ✨ 新增:列名映射
|
||||
|
||||
class PivotRequest(BaseModel):
|
||||
"""Pivot请求模型"""
|
||||
@@ -135,6 +141,11 @@ class PivotRequest(BaseModel):
|
||||
pivot_column: str
|
||||
value_columns: List[str]
|
||||
aggfunc: str = 'first'
|
||||
column_mapping: List[Dict[str, str]] = [] # ✨ 列名映射
|
||||
keep_unused_columns: bool = False # ✨ 是否保留未选择的列
|
||||
unused_agg_method: str = 'first' # ✨ 未选择列的聚合方式(first/mode/mean)
|
||||
original_column_order: List[str] = [] # ✨ 新增:原始列顺序
|
||||
pivot_value_order: List[str] = [] # ✨ 新增:透视列值的原始顺序
|
||||
|
||||
|
||||
# ==================== API路由 ====================
|
||||
@@ -763,13 +774,15 @@ async def operation_recode(request: RecodeRequest):
|
||||
# 转换为DataFrame
|
||||
df = pd.DataFrame(request.data)
|
||||
|
||||
# 调用预写函数
|
||||
# 调用预写函数(传递NA处理参数)
|
||||
result_df = apply_recode(
|
||||
df,
|
||||
request.column,
|
||||
request.mapping,
|
||||
request.create_new_column,
|
||||
request.new_column_name
|
||||
request.new_column_name,
|
||||
request.na_handling, # ✨ NA处理方式
|
||||
request.na_value # ✨ NA映射值
|
||||
)
|
||||
|
||||
# 转换回JSON(处理NaN和inf值)
|
||||
@@ -840,7 +853,7 @@ async def operation_binning(request: BinningRequest):
|
||||
# 转换为DataFrame
|
||||
df = pd.DataFrame(request.data)
|
||||
|
||||
# 调用预写函数
|
||||
# 调用预写函数(传递NA处理参数)
|
||||
result_df = apply_binning(
|
||||
df,
|
||||
request.column,
|
||||
@@ -848,7 +861,10 @@ async def operation_binning(request: BinningRequest):
|
||||
request.new_column_name,
|
||||
request.bins,
|
||||
request.labels,
|
||||
request.num_bins
|
||||
request.num_bins,
|
||||
request.na_handling, # ✨ NA处理方式
|
||||
request.na_label, # ✨ NA标签
|
||||
request.na_assign_to # ✨ NA分配到的组索引
|
||||
)
|
||||
|
||||
# 转换回JSON(处理Categorical类型、NaN值和inf值)
|
||||
@@ -1106,11 +1122,12 @@ async def operation_compute(request: ComputeRequest):
|
||||
# 转换为DataFrame
|
||||
df = pd.DataFrame(request.data)
|
||||
|
||||
# 调用预写函数
|
||||
# ✨ 调用预写函数(传递column_mapping)
|
||||
result_df = compute_column(
|
||||
df,
|
||||
request.new_column_name,
|
||||
request.formula
|
||||
request.formula,
|
||||
request.column_mapping # ✨ 传递列名映射
|
||||
)
|
||||
|
||||
# 转换回JSON(处理NaN值和inf值)
|
||||
@@ -1201,13 +1218,18 @@ async def operation_pivot(request: PivotRequest):
|
||||
# 转换为DataFrame
|
||||
df = pd.DataFrame(request.data)
|
||||
|
||||
# 调用预写函数
|
||||
# ✨ 调用预写函数(传递column_mapping和未选择列处理参数)
|
||||
result_df = pivot_long_to_wide(
|
||||
df,
|
||||
request.index_column,
|
||||
request.pivot_column,
|
||||
request.value_columns,
|
||||
request.aggfunc
|
||||
request.aggfunc,
|
||||
request.column_mapping, # ✨ 传递列名映射
|
||||
request.keep_unused_columns, # ✨ 是否保留未选择的列
|
||||
request.unused_agg_method, # ✨ 未选择列的聚合方式
|
||||
request.original_column_order, # ✨ 原始列顺序
|
||||
request.pivot_value_order # ✨ 透视列值的原始顺序
|
||||
)
|
||||
|
||||
# 转换回JSON(处理NaN和inf值)
|
||||
|
||||
Reference in New Issue
Block a user