feat(dc/tool-c): Add pivot column ordering and NA handling features

Major features: 1. Pivot transformation enhancements: - Add option to keep unselected columns with 3 aggregation methods - Maintain original column order after pivot (aligned with source file) - Preserve pivot value order (first appearance order) 2. NA handling across 4 core functions: - Recode: Support keep/map/drop for NA values - Filter: Already supports is_null/not_null operators - Binning: Support keep/label/assign for NA values (fix nan display) - Conditional: Add is_null/not_null operators 3. UI improvements: - Enable column header tooltips with custom header component - Add closeable alert for 50-row preview - Fix page scrollbar issues Modified files: Python: pivot.py, recode.py, binning.py, conditional.py, main.py Backend: SessionController, QuickActionController, QuickActionService Frontend: PivotDialog, RecodeDialog, BinningDialog, ConditionalDialog, DataGrid, index Status: Ready for testing
2025-12-09 14:40:14 +08:00
parent 75ceeb0653
commit f4f1d09837
19 changed files with 2314 additions and 123 deletions
--- a/backend/src/modules/dc/tool-c/controllers/QuickActionController.ts
+++ b/backend/src/modules/dc/tool-c/controllers/QuickActionController.ts
@@ -113,8 +113,9 @@ export class QuickActionController {
          });
      }
      
-      // 4. 获取完整数据
+      // 4. 获取完整数据和session信息（包含columnMapping）
      let fullData: any[];
+      let session: any;
      try {
        fullData = await sessionService.getFullData(sessionId);
        if (!fullData || fullData.length === 0) {
@@ -124,6 +125,9 @@ export class QuickActionController {
            error: '数据为空，请重新上传文件'
          });
        }
+        
+        // ✨ 获取session信息（用于compute等需要columnMapping的操作）
+        session = await sessionService.getSession(sessionId);
      } catch (error: any) {
        logger.error(`[QuickAction] 获取数据失败: ${error.message}`);
        return reply.code(500).send({
@@ -153,10 +157,32 @@ export class QuickActionController {
            executeResult = await quickActionService.executeDropna(fullData, params);
            break;
          case 'compute':
-            executeResult = await quickActionService.executeCompute(fullData, params);
+            // ✨ 传递columnMapping给compute操作
+            executeResult = await quickActionService.executeCompute(fullData, params, session.columnMapping);
            break;
          case 'pivot':
-            executeResult = await quickActionService.executePivot(fullData, params);
+            // ✨ 传递columnMapping、原始列顺序和透视值顺序给pivot操作
+            const originalColumnOrder = session.columns || [];
+            
+            // ✨ 获取透视列值的原始顺序（按首次出现顺序）
+            const pivotColumn = params.pivotColumn;
+            const seenPivotValues = new Set();
+            const pivotValueOrder: string[] = [];
+            for (const row of fullData) {
+              const pivotValue = row[pivotColumn];
+              if (pivotValue !== null && pivotValue !== undefined && !seenPivotValues.has(pivotValue)) {
+                seenPivotValues.add(pivotValue);
+                pivotValueOrder.push(String(pivotValue));
+              }
+            }
+            
+            executeResult = await quickActionService.executePivot(
+              fullData, 
+              params, 
+              session.columnMapping,
+              originalColumnOrder,
+              pivotValueOrder
+            );
            break;
        }
        
--- a/backend/src/modules/dc/tool-c/controllers/SessionController.ts
+++ b/backend/src/modules/dc/tool-c/controllers/SessionController.ts
@@ -392,7 +392,7 @@ export class SessionController {
      // 1. 获取完整数据
      const data = await sessionService.getFullData(id);

-      // 2. 提取唯一值（去除空值和首尾空格）
+      // 2. 提取唯一值（保留NA值，但清理字符串）
      const values = data.map((row) => row[column]);
      const cleanedValues = values.map((val) => {
        if (val === null || val === undefined || val === '') return null;
@@ -400,12 +400,22 @@ export class SessionController {
        return typeof val === 'string' ? val.trim() : val;
      });

-      // 3. 去重
-      const uniqueValues = Array.from(new Set(cleanedValues))
+      // 3. 去重（✨ 保留null值，但用特殊标记表示）
+      const uniqueSet = Array.from(new Set(cleanedValues));
+      const hasNA = uniqueSet.includes(null);
+      const naCount = cleanedValues.filter(v => v === null).length;  // ✨ 统计NA数量
+      
+      // 过滤掉null和空白，然后排序
+      const nonNAValues = uniqueSet
        .filter((v) => v !== null && v !== '' && v !== '(空白)')
        .sort(); // 排序，方便查看
      
-      logger.info(`[SessionController] 唯一值数量: ${uniqueValues.length}`);
+      // ✨ 如果有NA，添加到数组末尾（用特殊字符串标记）
+      const uniqueValues = hasNA 
+        ? [...nonNAValues, '<空值/NA>']
+        : nonNAValues;
+
+      logger.info(`[SessionController] 唯一值数量: ${uniqueValues.length} (含NA: ${hasNA}, NA数量: ${naCount})`);

      // 4. 返回结果
      return reply.send({
@@ -414,6 +424,8 @@ export class SessionController {
          column,
          uniqueValues,
          count: uniqueValues.length,
+          naCount: hasNA ? naCount : 0,  // ✨ 返回NA数量
+          totalCount: data.length,  // ✨ 总行数
        },
      });
    } catch (error: any) {
--- a/backend/src/modules/dc/tool-c/services/QuickActionService.ts
+++ b/backend/src/modules/dc/tool-c/services/QuickActionService.ts
@@ -27,6 +27,8 @@ interface RecodeParams {
  mapping: Record<string, any>;
  createNewColumn: boolean;
  newColumnName?: string;
+  naHandling?: 'keep' | 'map' | 'drop';  // ✨ 新增：NA处理方式
+  naValue?: any;  // ✨ 新增：NA映射值
 }

 interface BinningParams {
@@ -36,6 +38,9 @@ interface BinningParams {
  bins?: number[];
  labels?: (string | number)[];
  numBins?: number;
+  naHandling?: 'keep' | 'label' | 'assign';  // ✨ 新增：NA处理方式
+  naLabel?: string;  // ✨ 新增：NA标签
+  naAssignTo?: number;  // ✨ 新增：NA分配到的组索引
 }

 interface ConditionalParams {
@@ -68,6 +73,8 @@ interface PivotParams {
  pivotColumn: string;
  valueColumns: string[];
  aggfunc: 'first' | 'last' | 'mean' | 'sum' | 'min' | 'max';
+  keepUnusedColumns?: boolean;  // ✨ 新增：是否保留未选择的列
+  unusedAggMethod?: 'first' | 'mode' | 'mean';  // ✨ 新增：未选择列的聚合方式
 }

 interface OperationResult {
@@ -128,6 +135,8 @@ export class QuickActionService {
        mapping: params.mapping,
        create_new_column: params.createNewColumn,
        new_column_name: params.newColumnName,
+        na_handling: params.naHandling || 'keep',  // ✨ NA处理方式
+        na_value: params.naValue,  // ✨ NA映射值
      }, {
        timeout: 60000,
      });
@@ -164,6 +173,9 @@ export class QuickActionService {
        bins: params.bins,
        labels: params.labels,
        num_bins: params.numBins,
+        na_handling: params.naHandling || 'keep',  // ✨ NA处理方式
+        na_label: params.naLabel,  // ✨ NA标签
+        na_assign_to: params.naAssignTo,  // ✨ NA分配到的组索引
      }, {
        timeout: 60000,
      });
@@ -254,14 +266,16 @@ export class QuickActionService {
  /**
   * 执行计算列
   */
-  async executeCompute(data: any[], params: ComputeParams): Promise<OperationResult> {
+  async executeCompute(data: any[], params: ComputeParams, columnMapping?: any[]): Promise<OperationResult> {
    try {
      logger.info(`[QuickActionService] 调用计算列API: ${params.newColumnName}, formula=${params.formula}`);
      
+      // ✨ 传递columnMapping给Python（方案B：Python负责替换）
      const response = await axios.post(`${PYTHON_SERVICE_URL}/api/operations/compute`, {
        data,
        new_column_name: params.newColumnName,
        formula: params.formula,
+        column_mapping: columnMapping || [], // ✨ 传递列名映射
      }, {
        timeout: 60000,
      });
@@ -286,16 +300,28 @@ export class QuickActionService {
  /**
   * 执行Pivot（长表→宽表）
   */
-  async executePivot(data: any[], params: PivotParams): Promise<OperationResult> {
+  async executePivot(
+    data: any[], 
+    params: PivotParams, 
+    columnMapping?: any[], 
+    originalColumnOrder?: string[],
+    pivotValueOrder?: string[]
+  ): Promise<OperationResult> {
    try {
      logger.info(`[QuickActionService] 调用Pivot API: ${params.indexColumn} × ${params.pivotColumn}`);
      
+      // ✨ 传递columnMapping、未选择列处理参数和列顺序给Python
      const response = await axios.post(`${PYTHON_SERVICE_URL}/api/operations/pivot`, {
        data,
        index_column: params.indexColumn,
        pivot_column: params.pivotColumn,
        value_columns: params.valueColumns,
        aggfunc: params.aggfunc,
+        column_mapping: columnMapping || [], // ✨ 传递列名映射
+        keep_unused_columns: params.keepUnusedColumns || false,  // ✨ 是否保留未选择的列
+        unused_agg_method: params.unusedAggMethod || 'first',  // ✨ 未选择列的聚合方式
+        original_column_order: originalColumnOrder || [],  // ✨ 原始列顺序
+        pivot_value_order: pivotValueOrder || [],  // ✨ 透视列值的原始顺序
      }, {
        timeout: 60000,
      });
--- a/docs/03-业务模块/DC-数据清洗整理/04-开发计划/工具C_NA处理功能开发总结.md
+++ b/docs/03-业务模块/DC-数据清洗整理/04-开发计划/工具C_NA处理功能开发总结.md
@@ -0,0 +1,339 @@
+# 工具C - NA处理功能开发总结
+
+## 📋 概述
+
+**目标**：在4个核心功能中添加对NA（空值/缺失值）的显式处理，让用户能够明确看到并处理缺失值。
+
+**NA显示名称**：`空值/NA`（中英文结合）
+
+---
+
+## ✅ 已完成：Python后端（100%）
+
+### 1. recode.py - 数值映射 ✅
+
+**新增参数**：
+- `na_handling`: 'keep' | 'map' | 'drop'
+  - `keep`: 保持为NA（默认）
+  - `map`: 映射为指定值
+  - `drop`: 删除包含NA的行
+- `na_value`: NA映射值（当na_handling='map'时使用）
+
+**实现逻辑**：
+```python
+if original_na_count > 0:
+    na_mask = result[column].isna()
+    
+    if na_handling == 'keep':
+        # 保持为NA（已经是NA，无需操作）
+        print(f'📊 NA处理：保持为NA（{original_na_count}个）')
+    
+    elif na_handling == 'map':
+        # 映射为指定值
+        result.loc[na_mask, target_column] = na_value
+        print(f'📊 NA处理：映射为 {na_value}（{original_na_count}个）')
+    
+    elif na_handling == 'drop':
+        # 删除包含NA的行
+        result = result[~na_mask].copy()
+```
+
+### 2. filter.py - 高级筛选 ✅
+
+**已支持**：`is_null` 和 `not_null` 运算符
+
+无需修改，原有代码已经支持！
+
+```python
+elif operator == 'is_null':
+    mask = df[column].isna()
+elif operator == 'not_null':
+    mask = df[column].notna()
+```
+
+### 3. binning.py - 生成分类变量 ✅
+
+**新增参数**：
+- `na_handling`: 'keep' | 'label' | 'assign'
+  - `keep`: 保持为NA（默认）
+  - `label`: 标记为指定标签（如"缺失"）
+  - `assign`: 分配到指定组
+- `na_label`: NA标签（当na_handling='label'时使用）
+- `na_assign_to`: NA分配到的组索引（当na_handling='assign'时使用）
+
+**实现逻辑**：
+```python
+if original_na_count > 0:
+    na_mask = result[column].isna()
+    
+    if na_handling == 'keep':
+        # 保持为NA
+        print(f'📊 NA处理：保持为NA（{original_na_count}个）')
+    
+    elif na_handling == 'label':
+        # 标记为指定标签
+        label_to_use = na_label if na_label else '空值/NA'
+        result.loc[na_mask, new_column_name] = label_to_use
+        print(f'📊 NA处理：标记为 "{label_to_use}"（{original_na_count}个）')
+    
+    elif na_handling == 'assign':
+        # 分配到指定组
+        if labels and na_assign_to is not None:
+            result.loc[na_mask, new_column_name] = labels[na_assign_to]
+```
+
+### 4. conditional.py - 条件生成列 ✅
+
+**新增支持**：`is_null` 和 `not_null` 运算符
+
+```python
+elif operator == 'is_null':  # ✨ 新增：为空
+    mask = result[column].isna()
+elif operator == 'not_null':  # ✨ 新增：不为空
+    mask = result[column].notna()
+```
+
+### 5. main.py - API请求模型 ✅
+
+**RecodeRequest**：
+```python
+na_handling: str = 'keep'
+na_value: Any = None
+```
+
+**BinningRequest**：
+```python
+na_handling: str = 'keep'
+na_label: str = None
+na_assign_to: int = None
+```
+
+**FilterRequest 和 ConditionalRequest**：
+无需修改，已支持
+
+---
+
+## 🔄 待完成：Node.js后端
+
+### QuickActionService.ts
+
+**需要更新的接口**：
+
+1. **RecodeParams**：
+```typescript
+interface RecodeParams {
+  column: string;
+  mapping: Record<string, any>;
+  createNewColumn?: boolean;
+  newColumnName?: string;
+  naHandling?: 'keep' | 'map' | 'drop';  // ✨ 新增
+  naValue?: any;  // ✨ 新增
+}
+```
+
+2. **BinningParams**：
+```typescript
+interface BinningParams {
+  column: string;
+  method: 'custom' | 'equal_width' | 'equal_freq';
+  newColumnName: string;
+  bins?: number[];
+  labels?: string[];
+  numBins?: number;
+  naHandling?: 'keep' | 'label' | 'assign';  // ✨ 新增
+  naLabel?: string;  // ✨ 新增
+  naAssignTo?: number;  // ✨ 新增
+}
+```
+
+**API调用**（自动传递所有参数，无需特殊处理）
+
+---
+
+## 🎨 待完成：前端UI
+
+### 1. RecodeDialog.tsx - 数值映射
+
+**UI设计**：
+```
+┌─────────────────────────────────────┐
+│ 数值映射                     [X]    │
+├─────────────────────────────────────┤
+│ 选择列：[婚姻状况▼]                 │
+│                                      │
+│ 唯一值映射：                         │
+│ ┌──────────────────────────────┐   │
+│ │ 原始值      → 新值            │   │
+│ │ 已婚        → [1        ]     │   │
+│ │ 未婚        → [0        ]     │   │
+│ │ 空值/NA     → [▼]            │ ⭐│
+│ │   ├─ 保持为NA（默认）         │   │
+│ │   ├─ 映射为：[____]           │   │
+│ │   └─ 删除该行                 │   │
+│ └──────────────────────────────┘   │
+│                                      │
+│ ℹ️ 当前有125个空值（15.6%）        │
+└─────────────────────────────────────┘
+```
+
+**实现要点**：
+1. 调用`/api/v1/dc/tool-c/sessions/:id/unique-values`时，检测是否有NA
+2. 如果有NA，显示"空值/NA"特殊行
+3. 提供3种选择：保持NA / 映射为指定值 / 删除行
+
+### 2. FilterDialog.tsx - 高级筛选
+
+**UI设计**：
+```
+条件：
+[婚姻状况▼] [运算符▼]
+  • 等于
+  • 不等于
+  • 为空          ← ✨ 新增
+  • 不为空        ← ✨ 新增
+  • ...
+```
+
+**实现要点**：
+1. 在运算符下拉菜单中添加"为空"和"不为空"选项
+2. 当选择这两个运算符时，隐藏"值"输入框（不需要输入值）
+
+### 3. BinningDialog.tsx - 生成分类变量
+
+**UI设计**：
+```
+┌─────────────────────────────────────┐
+│ 生成分类变量                 [X]    │
+├─────────────────────────────────────┤
+│ 原始列：[年龄▼]                     │
+│ ...分组规则...                       │
+│                                      │
+│ ⚠️ 空值处理：                       │ ⭐
+│ ⚪ 保持为空（默认）                 │
+│ ⚪ 标记为：[缺失___]                │
+│ ⚪ 分配到组：[第1组▼]               │
+│                                      │
+│ ℹ️ 当前有25个空值（3.1%）          │
+└─────────────────────────────────────┘
+```
+
+**实现要点**：
+1. 添加Radio Group for NA处理方式
+2. 根据选择显示相应的输入框
+3. 传递`naHandling`、`naLabel`、`naAssignTo`参数
+
+### 4. ConditionalDialog.tsx - 条件生成列
+
+**UI设计**：
+```
+规则1：
+如果 [婚姻状况▼] [运算符▼]
+  • 等于
+  • 不等于
+  • 为空          ← ✨ 新增
+  • 不为空        ← ✨ 新增
+  • ...
+则填充：[低风险        ]
+```
+
+**实现要点**：
+1. 与FilterDialog类似，在运算符下拉菜单中添加"为空"和"不为空"
+2. 这两个运算符不需要输入值
+
+---
+
+## 🧪 测试用例
+
+### 测试数据准备
+
+```csv
+ID,婚姻状况,年龄,收缩压
+1,已婚,45,120
+2,未婚,35,130
+3,,50,  # ← NA
+4,离异,60,
+5,,NA,140
+```
+
+### 测试场景
+
+| 编号 | 功能 | 测试场景 | 预期结果 |
+|------|------|----------|----------|
+| TC-1 | 数值映射 - 保持NA | 婚姻状况：已婚=1，未婚=0，NA=保持 | NA行的新列为NA ✅ |
+| TC-2 | 数值映射 - 映射NA | 婚姻状况：已婚=1，未婚=0，NA=映射为9 | NA行的新列为9 ✅ |
+| TC-3 | 数值映射 - 删除NA | 婚姻状况：已婚=1，未婚=0，NA=删除 | NA行被删除，总行数减少 ✅ |
+| TC-4 | 高级筛选 - 为空 | 筛选"婚姻状况"为空 | 只保留NA行 ✅ |
+| TC-5 | 高级筛选 - 不为空 | 筛选"婚姻状况"不为空 | 只保留非NA行 ✅ |
+| TC-6 | 生成分类变量 - 保持NA | 年龄分组，NA保持 | NA行的新列为NA ✅ |
+| TC-7 | 生成分类变量 - 标记NA | 年龄分组，NA标记为"缺失" | NA行的新列为"缺失" ✅ |
+| TC-8 | 生成分类变量 - 分配NA | 年龄分组，NA分配到第1组 | NA行的新列为第1组标签 ✅ |
+| TC-9 | 条件生成列 - 为空 | 如果婚姻状况为空，则"未知" | NA行的新列为"未知" ✅ |
+| TC-10 | 条件生成列 - 不为空 | 如果婚姻状况不为空，则"已知" | 非NA行的新列为"已知" ✅ |
+
+---
+
+## 📊 开发进度
+
+| 阶段 | 状态 | 备注 |
+|------|------|------|
+| Python后端 - recode.py | ✅ 100% | 已完成 |
+| Python后端 - filter.py | ✅ 100% | 已支持（无需修改） |
+| Python后端 - binning.py | ✅ 100% | 已完成 |
+| Python后端 - conditional.py | ✅ 100% | 已完成 |
+| Python后端 - main.py | ✅ 100% | 已完成 |
+| Node.js后端 | ✅ 100% | 已完成（参数传递） |
+| 前端 - RecodeDialog | ✅ 100% | 已完成（NA处理下拉菜单） |
+| 前端 - FilterDialog | ✅ 100% | 已完成（已支持is_null/not_null） |
+| 前端 - BinningDialog | ✅ 100% | 已完成（NA处理Radio Group） |
+| 前端 - ConditionalDialog | ✅ 100% | 已完成（添加is_null/not_null） |
+| 测试 | ⏳ 待测试 | 等待用户测试验证 |
+
+---
+
+## 🎯 下一步行动
+
+1. **Node.js后端**（预计15分钟）
+   - 更新RecodeParams接口
+   - 更新BinningParams接口
+   - （FilterParams和ConditionalParams无需修改）
+
+2. **前端UI**（预计2小时）
+   - RecodeDialog：添加NA处理下拉菜单（45分钟）
+   - FilterDialog：添加"为空"/"不为空"运算符（15分钟）
+   - BinningDialog：添加NA处理Radio Group（30分钟）
+   - ConditionalDialog：添加"为空"/"不为空"运算符（30分钟）
+
+3. **测试**（预计30分钟）
+   - 执行10个测试用例
+   - 修复发现的问题
+
+**总计剩余时间：约3小时**
+
+---
+
+## 📝 技术要点
+
+### Python端
+- 使用`df[column].isna()`检测NA
+- 使用`df.loc[mask, col] = value`填充NA
+- 使用`df[~mask]`删除NA行
+- 统计并打印NA处理信息
+
+### 前端端
+- 在获取unique values时检测NA
+- 使用`<空值/NA>`作为显示名称
+- 根据用户选择构造请求参数
+- 显示NA统计信息（如"当前有125个空值"）
+
+### 验收标准
+- ✅ 用户能明确看到NA的存在
+- ✅ 用户能选择如何处理NA
+- ✅ 处理后的结果符合用户选择
+- ✅ 所有功能的NA处理方式清晰统一
+
+---
+
+**文档创建时间**：2025-12-09  
+**Python后端开发状态**：✅ 已完成  
+**剩余工作**：Node.js后端 + 前端UI + 测试
+
--- a/docs/03-业务模块/DC-数据清洗整理/04-开发计划/工具C_Pivot列顺序优化总结.md
+++ b/docs/03-业务模块/DC-数据清洗整理/04-开发计划/工具C_Pivot列顺序优化总结.md
@@ -0,0 +1,190 @@
+# 工具C - Pivot列顺序优化总结
+
+## 📋 问题描述
+
+**用户需求**：长宽转换后，列的排序应该与上传文件时的列顺序保持一致。
+
+**当前问题**：系统按字母顺序排列转换后的列，导致顺序与原文件不一致。
+
+---
+
+## 🎯 解决方案：方案A - Python端排序
+
+### 核心思路
+1. Node.js后端从session获取**原始列顺序**
+2. Node.js后端从数据中提取**透视列值的原始顺序**（按首次出现顺序）
+3. 传递给Python
+4. Python在pivot后，按原始顺序重排列
+
+---
+
+## 🛠️ 实现细节
+
+### 1. Python端（pivot.py）
+
+**新增参数**：
+- `original_column_order: List[str]`：原始列顺序（如`['Record ID', 'Event Name', 'FMA', '体重', '收缩压', ...]`）
+- `pivot_value_order: List[str]`：透视列值的原始顺序（如`['基线', '1个月', '2个月', ...]`）
+
+**排序逻辑**：
+```python
+if original_column_order:
+    # 1. 索引列始终在最前面
+    final_cols = [index_column]
+    
+    # 2. 按原始列顺序添加转换后的列
+    for orig_col in original_column_order:
+        if orig_col in value_columns:
+            # 找出所有属于这个原列的新列
+            related_cols = [c for c in df_pivot.columns if c.startswith(f'{orig_col}___')]
+            
+            # ✨ 按透视列的原始顺序排序
+            if pivot_value_order:
+                pivot_order_map = {val: idx for idx, val in enumerate(pivot_value_order)}
+                related_cols_sorted = sorted(
+                    related_cols,
+                    key=lambda c: pivot_order_map.get(c.split('___')[1], 999)
+                )
+            else:
+                related_cols_sorted = sorted(related_cols)
+            
+            final_cols.extend(related_cols_sorted)
+    
+    # 3. 添加未选择的列（保持原始顺序）
+    if keep_unused_columns:
+        for orig_col in original_column_order:
+            if orig_col in df_pivot.columns and orig_col not in final_cols:
+                final_cols.append(orig_col)
+    
+    # 4. 重排列
+    df_pivot = df_pivot[final_cols]
+```
+
+### 2. Python端（main.py）
+
+**PivotRequest模型**：
+```python
+class PivotRequest(BaseModel):
+    # ... 原有字段 ...
+    original_column_order: List[str] = []  # ✨ 新增
+    pivot_value_order: List[str] = []  # ✨ 新增
+```
+
+**调用pivot_long_to_wide**：
+```python
+result_df = pivot_long_to_wide(
+    df,
+    request.index_column,
+    request.pivot_column,
+    request.value_columns,
+    request.aggfunc,
+    request.column_mapping,
+    request.keep_unused_columns,
+    request.unused_agg_method,
+    request.original_column_order,  # ✨ 新增
+    request.pivot_value_order  # ✨ 新增
+)
+```
+
+### 3. Node.js后端（QuickActionController.ts）
+
+**获取原始列顺序**：
+```typescript
+const originalColumnOrder = session.columns || [];
+```
+
+**获取透视列值的原始顺序**：
+```typescript
+const pivotColumn = params.pivotColumn;
+const seenPivotValues = new Set();
+const pivotValueOrder: string[] = [];
+
+for (const row of fullData) {
+  const pivotValue = row[pivotColumn];
+  if (pivotValue !== null && pivotValue !== undefined && !seenPivotValues.has(pivotValue)) {
+    seenPivotValues.add(pivotValue);
+    pivotValueOrder.push(String(pivotValue));
+  }
+}
+```
+
+**传递给QuickActionService**：
+```typescript
+executeResult = await quickActionService.executePivot(
+  fullData, 
+  params, 
+  session.columnMapping,
+  originalColumnOrder,  // ✨ 新增
+  pivotValueOrder  // ✨ 新增
+);
+```
+
+### 4. Node.js后端（QuickActionService.ts）
+
+**方法签名**：
+```typescript
+async executePivot(
+  data: any[], 
+  params: PivotParams, 
+  columnMapping?: any[], 
+  originalColumnOrder?: string[],  // ✨ 新增
+  pivotValueOrder?: string[]  // ✨ 新增
+): Promise<OperationResult>
+```
+
+**传递给Python**：
+```typescript
+const response = await axios.post(`${PYTHON_SERVICE_URL}/api/operations/pivot`, {
+  // ... 原有参数 ...
+  original_column_order: originalColumnOrder || [],  // ✨ 新增
+  pivot_value_order: pivotValueOrder || [],  // ✨ 新增
+});
+```
+
+---
+
+## 📊 效果对比
+
+### 修改前（按字母顺序）
+```
+Record ID | FMA___基线 | FMA___1个月 | 收缩压___基线 | 收缩压___1个月 | 体重___基线 | 体重___1个月
+    ↑          ↑             ↑              ↑               ↑            ↑           ↑
+  索引列      F开头        F开头           S开头(拼音)     S开头        T开头       T开头
+```
+
+### 修改后（按原始顺序）
+```
+Record ID | FMA___基线 | FMA___1个月 | 体重___基线 | 体重___1个月 | 收缩压___基线 | 收缩压___1个月
+    ↑          ↑             ↑            ↑             ↑             ↑              ↑
+  索引列    原文件第3列   原文件第3列   原文件第4列   原文件第4列   原文件第5列    原文件第5列
+```
+
+### 透视值内部顺序（按原始出现顺序）
+```
+FMA___基线 | FMA___1个月 | FMA___2个月
+    ↑           ↑             ↑
+  首次出现    第二次出现    第三次出现
+（而不是按"1个月"、"2个月"、"基线"的字母顺序）
+```
+
+---
+
+## ✅ 开发完成
+
+### 修改文件清单
+1. ✅ `extraction_service/operations/pivot.py`
+2. ✅ `extraction_service/main.py`
+3. ✅ `backend/src/modules/dc/tool-c/controllers/QuickActionController.ts`
+4. ✅ `backend/src/modules/dc/tool-c/services/QuickActionService.ts`
+
+### 优势
+- ✅ 列顺序与原文件一致（用户熟悉）
+- ✅ 透视值顺序按时间顺序（基线→1个月→2个月）
+- ✅ 未选择的列也保持原始顺序
+- ✅ 导出Excel时顺序正确
+
+---
+
+**开发时间**：2025-12-09  
+**状态**：✅ 已完成，等待测试
+
--- a/docs/03-业务模块/DC-数据清洗整理/04-开发计划/工具C_方案B实施总结_2025-12-09.md
+++ b/docs/03-业务模块/DC-数据清洗整理/04-开发计划/工具C_方案B实施总结_2025-12-09.md
@@ -0,0 +1,348 @@
+# 工具C - 方案B实施总结（列名特殊字符解决方案）
+
+> **日期**: 2025-12-09  
+> **版本**: v1.0  
+> **实施方案**: 方案B - Python负责列名替换
+
+---
+
+## 📋 问题背景
+
+### 原始问题
+
+用户上传的Excel文件表头包含特殊字符，导致计算列功能失败：
+
+**示例表头**:
+- `体重（kg）`
+- `1.高血压病（无=0，有=1，不知道=2）`
+- `身高（cm）`
+
+**报错信息**:
+```
+计算列失败：公式验证失败: 公式包含不允许的字符
+```
+
+---
+
+## 🎯 方案选择
+
+### 方案对比
+
+| 方案 | 描述 | 优点 | 缺点 | 评分 |
+|------|------|------|------|------|
+| **方案A** | 用户使用序号引用（col_0, col_1） | 技术最安全 | 用户体验差，不直观 | ⭐⭐ |
+| **方案B** | 用户使用原列名，Python负责替换 | 用户体验好，技术可靠 | 需实现替换逻辑 | ⭐⭐⭐⭐⭐ |
+| **方案C** | 前端替换列名 | 减少网络传输 | 边界识别困难，不可靠 | ⭐⭐⭐ |
+
+**最终选择**: **方案B** ✅
+
+---
+
+## 🏗️ 架构设计
+
+### 数据流
+
+```
+用户输入公式（原列名）
+    ↓
+前端：体重（kg） / (身高（cm）/100)**2
+    ↓
+后端：获取 columnMapping
+    ↓
+传递给Python: {
+  formula: "体重（kg） / (身高（cm）/100)**2",
+  column_mapping: [
+    {"originalName": "体重（kg）", "safeName": "col_0"},
+    {"originalName": "身高（cm）", "safeName": "col_1"}
+  ]
+}
+    ↓
+Python替换: col_0 / (col_1/100)**2
+    ↓
+执行计算 ✅
+```
+
+### 职责划分
+
+| 层级 | 职责 | 关键点 |
+|------|------|--------|
+| **前端** | UI交互、数据收集 | 用户看到和输入原列名 |
+| **后端** | 获取columnMapping、传递给Python | 从Session获取映射 |
+| **Python** | 列名替换、公式执行 | 按长度排序、精确替换 |
+
+---
+
+## 💻 实施细节
+
+### 1. 前端（ComputeDialog.tsx）
+
+**保持不变** - 已经使用原列名方式
+
+```typescript
+// 用户点击列名标签，插入到公式框
+<Tag onClick={() => setFormula(formula + col.name)}>
+  {col.name}  {/* 显示原列名：体重（kg） */}
+</Tag>
+
+// 提交时直接传递原公式
+onApply({
+  newColumnName: "BMI",
+  formula: "体重（kg） / (身高（cm）/100)**2",  // 原列名
+});
+```
+
+### 2. 后端（QuickActionController.ts）
+
+**修改**: 获取session并传递columnMapping
+
+```typescript
+// 获取session（包含columnMapping）
+session = await sessionService.getSession(sessionId);
+
+// 传递给QuickActionService
+executeResult = await quickActionService.executeCompute(
+  fullData, 
+  params, 
+  session.columnMapping  // ✅ 传递映射
+);
+```
+
+### 3. 后端（QuickActionService.ts）
+
+**修改**: 接收并传递columnMapping给Python
+
+```typescript
+async executeCompute(
+  data: any[], 
+  params: ComputeParams, 
+  columnMapping?: any[]  // ✅ 新增参数
+): Promise<OperationResult> {
+  const response = await axios.post(`${PYTHON_SERVICE_URL}/api/operations/compute`, {
+    data,
+    new_column_name: params.newColumnName,
+    formula: params.formula,
+    column_mapping: columnMapping || [],  // ✅ 传递映射
+  });
+  
+  return response.data;
+}
+```
+
+### 4. Python（main.py）
+
+**修改**: 更新请求模型
+
+```python
+class ComputeRequest(BaseModel):
+    data: List[Dict[str, Any]]
+    new_column_name: str
+    formula: str
+    column_mapping: List[Dict[str, str]] = []  # ✅ 新增字段
+
+@app.post("/api/operations/compute")
+async def operation_compute(request: ComputeRequest):
+    result_df = compute_column(
+        df,
+        request.new_column_name,
+        request.formula,
+        request.column_mapping  # ✅ 传递映射
+    )
+```
+
+### 5. Python（compute.py）
+
+**核心实现**: 列名替换逻辑
+
+```python
+def replace_column_names_in_formula(
+    formula: str, 
+    column_mapping: List[Dict[str, str]]
+) -> str:
+    """
+    ✅ 核心算法：可靠的列名替换
+    """
+    safe_formula = formula
+    
+    # 关键1：按列名长度倒序排序
+    # 避免子串问题：先替换"高血压病史"，再替换"高血压"
+    sorted_mapping = sorted(
+        column_mapping,
+        key=lambda x: len(x['originalName']),
+        reverse=True
+    )
+    
+    # 关键2：逐个精确替换（不使用正则）
+    for item in sorted_mapping:
+        original = item['originalName']
+        safe = item['safeName']
+        
+        if original in safe_formula:
+            safe_formula = safe_formula.replace(original, safe)
+    
+    return safe_formula
+
+def compute_column(
+    df: pd.DataFrame,
+    new_column_name: str,
+    formula: str,
+    column_mapping: Optional[List[Dict[str, str]]] = None
+) -> pd.DataFrame:
+    """
+    ✅ 方案B：Python负责替换
+    """
+    # 1. 替换列名
+    if column_mapping:
+        safe_formula = replace_column_names_in_formula(formula, column_mapping)
+    else:
+        safe_formula = formula
+    
+    # 2. 准备执行环境
+    env = {}
+    for item in column_mapping:
+        env[item['safeName']] = df[item['originalName']]
+    env.update(ALLOWED_FUNCTIONS)
+    
+    # 3. 执行（不需要字符验证！）
+    result = eval(safe_formula, {"__builtins__": {}}, env)
+    
+    return df.assign(**{new_column_name: result})
+```
+
+---
+
+## ✅ 解决的问题
+
+### 1. 特殊字符问题 ✅
+- **问题**: `体重（kg）` 包含中文括号
+- **解决**: Python使用安全列名 `col_0`，不受特殊字符影响
+
+### 2. 子串包含问题 ✅
+- **问题**: "高血压" 和 "高血压病史" 可能误替换
+- **解决**: 按长度倒序排序，先替换长列名
+
+### 3. 边界识别问题 ✅
+- **问题**: 正则`\b`对中文字符不可靠
+- **解决**: 使用Python字符串`replace`，简单可靠
+
+### 4. 字符白名单问题 ✅
+- **问题**: 需要枚举所有允许的字符
+- **解决**: 不需要验证！Python只处理安全列名
+
+---
+
+## 🧪 测试用例
+
+### 测试1：基本功能
+```python
+column_mapping = [
+    {"originalName": "体重（kg）", "safeName": "col_0"},
+    {"originalName": "身高（cm）", "safeName": "col_1"}
+]
+formula = "体重（kg） / (身高（cm）/100)**2"
+# 预期: col_0 / (col_1/100)**2 ✅
+```
+
+### 测试2：子串包含
+```python
+column_mapping = [
+    {"originalName": "高血压", "safeName": "col_0"},
+    {"originalName": "高血压病史", "safeName": "col_1"}
+]
+formula = "高血压病史 + 高血压"
+# 预期: col_1 + col_0 ✅（因为按长度排序）
+```
+
+### 测试3：复杂特殊字符
+```python
+column_mapping = [
+    {"originalName": "1.高血压病（无=0，有=1，不知道=2）", "safeName": "col_0"}
+]
+formula = "1.高血压病（无=0，有=1，不知道=2） * 2"
+# 预期: col_0 * 2 ✅
+```
+
+### 测试4：嵌套括号
+```python
+column_mapping = [
+    {"originalName": "FMA总分（0-100）", "safeName": "col_0"}
+]
+formula = "FMA总分（0-100） / 100"
+# 预期: col_0 / 100 ✅
+```
+
+---
+
+## 📊 性能影响
+
+| 指标 | 影响 | 说明 |
+|------|------|------|
+| **网络传输** | +5KB | columnMapping约5KB（100列） |
+| **替换时间** | <1ms | 字符串替换非常快 |
+| **总体性能** | 可忽略 | 相比数据处理时间（秒级）可忽略 |
+
+---
+
+## 🎯 优势总结
+
+### 用户体验 ⭐⭐⭐⭐⭐
+- ✅ 用户看到和输入原列名
+- ✅ 公式直观易懂
+- ✅ 历史记录清晰
+
+### 技术可靠性 ⭐⭐⭐⭐⭐
+- ✅ 不依赖正则边界识别
+- ✅ 按长度排序避免子串问题
+- ✅ Python字符串操作简单可靠
+
+### 可维护性 ⭐⭐⭐⭐⭐
+- ✅ 职责清晰（前端UI、Python逻辑）
+- ✅ 易于调试（可打印替换日志）
+- ✅ 未来不会再有字符问题
+
+---
+
+## 📝 后续工作
+
+### 已完成 ✅
+- [x] 前端保持使用原列名
+- [x] 后端传递columnMapping
+- [x] Python实现替换逻辑
+- [x] 移除字符验证
+- [x] 更新Pivot操作
+
+### 待测试 ⏳
+- [ ] 用户实际测试
+- [ ] 边界情况验证
+- [ ] 性能测试
+
+### 未来优化 💡
+- [ ] 添加公式语法高亮
+- [ ] 列名自动补全
+- [ ] 公式错误提示优化
+
+---
+
+## 🔗 相关文件
+
+### 修改的文件
+1. `backend/src/modules/dc/tool-c/controllers/QuickActionController.ts`
+2. `backend/src/modules/dc/tool-c/services/QuickActionService.ts`
+3. `extraction_service/main.py`
+4. `extraction_service/operations/compute.py`
+5. `extraction_service/operations/pivot.py`
+
+### 文档
+- 本文档：`工具C_方案B实施总结_2025-12-09.md`
+- 原Bug报告：`工具C_Bug修复总结_2025-12-08.md`
+
+---
+
+## ✨ 总结
+
+方案B成功实现了：
+1. **用户体验优秀** - 使用原列名，直观易懂
+2. **技术可靠** - Python替换，简单可控
+3. **彻底解决** - 不再有特殊字符问题
+
+**下一步**: 等待用户测试验证 ✅
+
--- a/docs/03-业务模块/DC-数据清洗整理/04-开发计划/工具C_缺失值处理功能开发计划.md
+++ b/docs/03-业务模块/DC-数据清洗整理/04-开发计划/工具C_缺失值处理功能开发计划.md
@@ -0,0 +1,697 @@
+# 工具C - 缺失值处理功能开发计划
+
+## 📋 概述
+
+**目标**：将现有的"删除缺失值"功能升级为综合的"缺失值处理"功能，包括删除、填补、高级填补三种策略。
+
+**设计方案**：方案B - 合并对话框 + Tab切换
+
+**核心原则**：
+- ✅ **填补操作创建新列**（保留原始数据，便于对比）
+- ✅ **新列紧邻原列**（方便用户查看和比较）
+- ✅ **MICE功能必须实现**（医学研究核心需求）
+- ✅ **无需撤销功能**（原始数据未被修改）
+
+---
+
+## 🎯 功能需求
+
+### Phase 1：必备功能（本次开发）
+
+#### Tab 1：删除缺失值 ✅ 已有
+- 保留现有功能
+- 删除包含缺失值的行
+- 删除缺失率过高的列
+
+#### Tab 2：填补缺失值 ⭐ 新增
+1. **均值填补**（Mean Imputation）
+   - 适用于：数值型变量，正态分布
+   - 实现：`df[column].fillna(df[column].mean())`
+
+2. **中位数填补**（Median Imputation）
+   - 适用于：数值型变量，偏态分布
+   - 实现：创建新列，填充中位数
+
+3. **众数填补**（Mode Imputation）
+   - 适用于：分类变量、离散型数值
+   - 实现：创建新列，填充众数
+
+4. **固定值填补**（Constant Imputation）
+   - 适用于：任何类型，用户指定值
+   - 实现：创建新列，填充指定值
+
+**注意**：所有填补方法都会创建新列（如`体重_填补`），新列紧邻原列，便于对比验证。
+
+#### Tab 3：高级填补 ⭐ 新增
+1. **MICE多重插补**（Multivariate Imputation by Chained Equations）
+   - 适用于：缺失率5%-30%，需要考虑变量间关系
+   - 实现：使用 `sklearn.impute.IterativeImputer`
+
+### Phase 2：未来扩展（本次不开发）
+- 前向/后向填充（Forward/Backward Fill）
+- 分组填补（Grouped Imputation）
+- 线性插值（Linear Interpolation）
+- KNN填补（KNN Imputation）
+
+---
+
+## 🎨 UI设计
+
+### 1. 按钮重命名
+**原**：`[删除缺失值]`  
+**新**：`[缺失值处理]`
+
+### 2. 对话框结构
+
+```
+┌───────────────────────────────────────────────────────┐
+│  缺失值处理                                    [X]    │
+├───────────────────────────────────────────────────────┤
+│  ┌────────┬────────┬──────────┐                      │
+│  │ 删除   │ 填补   │ 高级填补 │  ← Ant Design Tabs   │
+│  └────────┴────────┴──────────┘                      │
+├───────────────────────────────────────────────────────┤
+│                                                        │
+│  【Tab内容区域】                                       │
+│                                                        │
+│                                                        │
+│                    [取消]    [执行处理]               │
+└───────────────────────────────────────────────────────┘
+```
+
+### 3. Tab 2（填补）详细设计
+
+```
+┌───────────────────────────────────────────────────────┐
+│  【Tab 2: 填补缺失值】                                 │
+│                                                        │
+│  原始列：[体重（kg）▼]                                │
+│           ⚠️ 仅支持单列填补                           │
+│                                                        │
+│  新列名：[体重_填补           ] ← 用户可修改          │
+│          💡 新列将创建在原列旁边，便于对比            │
+│                                                        │
+│  📊 缺失值统计：                                      │
+│  ┌──────────────────────────────────────────────┐   │
+│  │ • 当前缺失：125个（15.6%）                   │   │
+│  │ • 有效值：675个（84.4%）                     │   │
+│  │ • 数据类型：数值型                           │   │
+│  │ • 有效值范围：45.2 - 98.5 kg               │   │
+│  │ • 有效值均值：70.3 kg                        │   │
+│  │ • 有效值中位数：68.5 kg                      │   │
+│  │ • 推荐方法：中位数填补（数据偏态）⭐         │   │
+│  └──────────────────────────────────────────────┘   │
+│                                                        │
+│  填补方法：                                            │
+│  ⚪ 均值填补（适合正态分布的数值变量）                │
+│  ⚪ 中位数填补（适合偏态分布的数值变量）⭐            │
+│  ⚪ 众数填补（适合分类变量或离散数值）                │
+│  ⚪ 固定值填补：[_______] ← 用户输入                 │
+│                                                        │
+│  📈 填补预览：                                        │
+│  ┌──────────────────────────────────────────────┐   │
+│  │ • 填补值：68.5 kg                            │   │
+│  │ • 填补后均值：70.2 kg（原75.3 kg）          │   │
+│  │ • 填补后标准差：12.5 kg（原10.8 kg）        │   │
+│  │ • 将创建新列："体重_填补"                   │   │
+│  │ • 原列"体重（kg）"保持不变 ✅               │   │
+│  └──────────────────────────────────────────────┘   │
+│                                                        │
+│  ✅ 优势：原始数据保留，可随时对比验证                │
+│                                                        │
+│                    [取消]    [执行填补]               │
+└───────────────────────────────────────────────────────┘
+```
+
+### 4. Tab 3（高级填补）详细设计
+
+```
+┌───────────────────────────────────────────────────────┐
+│  【Tab 3: 高级填补 - MICE多重插补】⭐ 必须实现        │
+│                                                        │
+│  ⭐ MICE多重插补                                       │
+│  （Multivariate Imputation by Chained Equations）     │
+│                                                        │
+│  选择要填补的列（可多选）：                            │
+│  ┌──────────────────────────────────────────────┐   │
+│  │ ☑ 体重（kg）         缺失：125（15.6%）     │   │
+│  │ ☑ 收缩压（mmHg）     缺失：82（10.3%）      │   │
+│  │ ☐ BMI               缺失：3（0.4%）         │   │
+│  │ ☐ 舒张压（mmHg）     缺失：0（0%）          │   │
+│  └──────────────────────────────────────────────┘   │
+│                                                        │
+│  新列命名规则：                                        │
+│  ☑ 自动命名：原列名 + "_MICE"                         │
+│     示例：体重（kg） → 体重（kg）_MICE                │
+│            收缩压（mmHg） → 收缩压（mmHg）_MICE       │
+│                                                        │
+│  参数设置：                                            │
+│  迭代次数：[10▼] （默认10次，范围5-50）               │
+│  随机种子：[42  ] （确保结果可重复）                  │
+│                                                        │
+│  📊 MICE说明：                                        │
+│  ┌──────────────────────────────────────────────┐   │
+│  │ MICE会根据其他变量的值来预测缺失值。         │   │
+│  │                                               │   │
+│  │ ✅ 适用场景：                                │   │
+│  │   • 缺失率5%-30%                             │   │
+│  │   • 需要考虑变量间的相关性                   │   │
+│  │   • 多个变量同时有缺失                       │   │
+│  │   • 医学研究高质量填补的首选方法 ⭐          │   │
+│  │                                               │   │
+│  │ ⚠️ 注意：                                    │   │
+│  │   • 计算时间较长（10万行约1分钟）            │   │
+│  │   • 需要足够的有效样本（建议>50%有效）       │   │
+│  │   • 新列将创建在各原列旁边，便于对比         │   │
+│  └──────────────────────────────────────────────┘   │
+│                                                        │
+│  💡 新列位置：每个新列紧邻其原列，便于逐列验证        │
+│                                                        │
+│                    [取消]    [执行MICE填补]           │
+└───────────────────────────────────────────────────────┘
+```
+
+---
+
+## 🛠️ 技术实现方案
+
+### 1. Python端（extraction_service）
+
+#### 新增文件：`operations/fillna.py`
+
+```python
+"""
+缺失值填补 - 预写函数
+支持均值、中位数、众数、固定值、MICE填补
+"""
+
+import pandas as pd
+import numpy as np
+from typing import Literal, Optional, List, Union, Any
+from sklearn.impute import IterativeImputer
+
+
+def fillna_simple(
+    df: pd.DataFrame,
+    column: str,
+    new_column_name: str,
+    method: Literal['mean', 'median', 'mode', 'constant'],
+    fill_value: Any = None
+) -> dict:
+    """
+    简单填补缺失值（创建新列）
+    
+    Args:
+        df: 输入数据框
+        column: 原始列名
+        new_column_name: 新列名（如"体重_填补"）
+        method: 填补方法
+            - 'mean': 均值填补
+            - 'median': 中位数填补
+            - 'mode': 众数填补
+            - 'constant': 固定值填补
+        fill_value: 固定值（method='constant'时必填）
+    
+    Returns:
+        {
+            'df': 包含新列的数据框（新列紧邻原列）,
+            'stats': {
+                'original_column': 原列名,
+                'new_column': 新列名,
+                'missing_before': 缺失数量,
+                'fill_value': 填补的值,
+                'mean_after': 填补后均值,
+                'std_after': 填补后标准差
+            }
+        }
+    
+    实现细节：
+        1. 复制原列数据
+        2. 执行填补
+        3. 使用 df.insert() 将新列插入到原列旁边
+        4. 返回包含新列的完整数据框
+    """
+    
+def get_column_missing_stats(
+    df: pd.DataFrame,
+    column: str
+) -> dict:
+    """
+    获取列的缺失值统计信息
+    
+    Returns:
+        {
+            'missing_count': 缺失数量,
+            'missing_rate': 缺失率,
+            'valid_count': 有效值数量,
+            'data_type': 数据类型,
+            'value_range': [min, max],  # 仅数值型
+            'mean': 均值,               # 仅数值型
+            'median': 中位数,           # 仅数值型
+            'mode': 众数,
+            'recommended_method': 推荐的填补方法
+        }
+    """
+    
+def fillna_mice(
+    df: pd.DataFrame,
+    columns: List[str],
+    n_iterations: int = 10,
+    random_state: int = 42
+) -> dict:
+    """
+    MICE多重插补（创建新列）⭐ 必须实现
+    
+    Args:
+        df: 输入数据框
+        columns: 要填补的列名列表（如["体重（kg）", "收缩压（mmHg）"]）
+        n_iterations: 迭代次数（默认10，范围5-50）
+        random_state: 随机种子（默认42，确保结果可重复）
+    
+    Returns:
+        {
+            'df': 包含所有新列的数据框（每个新列紧邻其原列）,
+            'stats': {
+                column: {
+                    'original_column': 原列名,
+                    'new_column': 新列名（原名_MICE）,
+                    'missing_before': 缺失数量,
+                    'filled_count': 填补数量,
+                    'mean_before': 填补前均值,
+                    'mean_after': 填补后均值
+                }
+            }
+        }
+    
+    实现细节：
+        1. 对所选列执行MICE填补
+        2. 为每列创建新列（命名：原列名_MICE）
+        3. 使用 df.insert() 将每个新列插入到其原列旁边
+        4. 返回包含所有新列的完整数据框
+    
+    示例：
+        原列：体重（kg）、收缩压（mmHg）
+        新列：体重（kg）_MICE、收缩压（mmHg）_MICE
+        结果顺序：体重（kg）、体重（kg）_MICE、收缩压（mmHg）、收缩压（mmHg）_MICE、...
+    """
+```
+
+#### 修改文件：`main.py`
+
+```python
+# 新增API端点
+
+@app.post("/fillna-simple")
+async def operation_fillna_simple(request: FillnaSimpleRequest):
+    """简单填补缺失值"""
+    
+@app.post("/fillna-stats")
+async def get_fillna_stats(request: FillnaStatsRequest):
+    """获取列的缺失值统计"""
+    
+@app.post("/fillna-mice")
+async def operation_fillna_mice(request: FillnaMiceRequest):
+    """MICE多重插补"""
+```
+
+### 2. Node.js后端（backend）
+
+#### 修改文件：`services/QuickActionService.ts`
+
+```typescript
+// 新增方法
+
+async executeFillnaSimple(params: {
+  sessionId: string;
+  column: string;
+  method: 'mean' | 'median' | 'mode' | 'constant';
+  fillValue?: any;
+}): Promise<any>
+
+async getFillnaStats(params: {
+  sessionId: string;
+  column: string;
+}): Promise<any>
+
+async executeFillnaMice(params: {
+  sessionId: string;
+  columns: string[];
+  nIterations: number;
+}): Promise<any>
+```
+
+#### 修改文件：`controllers/QuickActionController.ts`
+
+```typescript
+// 新增处理方法
+
+async handleFillnaSimple(request, reply)
+async getFillnaStats(request, reply)
+async handleFillnaMice(request, reply)
+```
+
+### 3. 前端（frontend-v2）
+
+#### 重命名文件
+- `DropnaDialog.tsx` → `MissingValueDialog.tsx`
+
+#### 修改文件：`MissingValueDialog.tsx`
+
+```typescript
+interface MissingValueDialogProps {
+  open: boolean;
+  onClose: () => void;
+  sessionId: string;
+  columns: Array<{ id: string; name: string; type?: string }>;
+  onSuccess: () => void;
+}
+
+// 新增状态
+const [activeTab, setActiveTab] = useState<'delete' | 'fill' | 'mice'>('fill');
+const [selectedColumn, setSelectedColumn] = useState<string>('');
+const [fillMethod, setFillMethod] = useState<'mean' | 'median' | 'mode' | 'constant'>('median');
+const [fillValue, setFillValue] = useState<any>(null);
+const [columnStats, setColumnStats] = useState<any>(null);
+
+// Tab 1: 删除（保留原有逻辑）
+// Tab 2: 填补（新增）
+// Tab 3: MICE（新增）
+```
+
+#### 修改文件：`index.tsx`
+
+```typescript
+// 更新按钮组
+const actionButtons = [
+  // ...
+  {
+    key: 'missing',
+    icon: <DeleteOutlined />,
+    label: '缺失值处理',  // ← 重命名
+    onClick: () => setMissingValueDialogOpen(true),
+  },
+  // ...
+];
+```
+
+---
+
+## 📂 文件修改清单
+
+### 新增文件
+1. `extraction_service/operations/fillna.py` - 填补功能实现
+2. `docs/03-业务模块/DC-数据清洗整理/04-开发计划/工具C_缺失值处理功能开发计划.md` - 本文档
+
+### 修改文件
+
+#### Python端
+1. `extraction_service/main.py`
+   - 新增 `/fillna-simple` 端点
+   - 新增 `/fillna-stats` 端点
+   - 新增 `/fillna-mice` 端点
+
+#### Node.js后端
+2. `backend/src/modules/dc/tool-c/services/QuickActionService.ts`
+   - 新增 `executeFillnaSimple` 方法
+   - 新增 `getFillnaStats` 方法
+   - 新增 `executeFillnaMice` 方法
+
+3. `backend/src/modules/dc/tool-c/controllers/QuickActionController.ts`
+   - 新增 `handleFillnaSimple` 处理方法
+   - 新增 `getFillnaStats` 处理方法
+   - 新增 `handleFillnaMice` 处理方法
+
+#### 前端
+4. `frontend-v2/src/modules/dc/pages/tool-c/components/DropnaDialog.tsx`
+   - **重命名为** `MissingValueDialog.tsx`
+   - 新增 Tabs 组件（删除/填补/高级填补）
+   - Tab 1: 保留原有删除功能
+   - Tab 2: 新增简单填补功能（均值/中位数/众数/固定值）
+   - Tab 3: 新增MICE填补功能
+
+5. `frontend-v2/src/modules/dc/pages/tool-c/index.tsx`
+   - 更新按钮标签：`删除缺失值` → `缺失值处理`
+   - 更新 Dialog 组件引用
+
+6. `frontend-v2/src/modules/dc/api/index.ts`
+   - 新增 `fillnaSimple` API
+   - 新增 `getFillnaStats` API
+   - 新增 `fillnaMice` API
+
+---
+
+## 🔄 开发步骤
+
+### Step 1: Python端基础功能（30min）
+1. 创建 `fillna.py`
+2. 实现 `fillna_simple` 函数
+3. 实现 `get_column_missing_stats` 函数
+4. 在 `main.py` 添加对应端点
+5. 测试：使用Postman或curl测试API
+
+### Step 2: Python端高级功能（30min）
+1. 实现 `fillna_mice` 函数
+2. 在 `main.py` 添加对应端点
+3. 测试：使用Postman测试MICE功能
+
+### Step 3: Node.js后端（20min）
+1. 修改 `QuickActionService.ts`
+2. 修改 `QuickActionController.ts`
+3. 测试：确保API转发正常
+
+### Step 4: 前端UI重构（40min）
+1. 重命名 `DropnaDialog.tsx` → `MissingValueDialog.tsx`
+2. 实现Tabs结构
+3. Tab 1: 迁移原有删除功能
+4. Tab 2: 实现简单填补UI
+5. Tab 3: 实现MICE填补UI
+6. 更新 `index.tsx` 中的引用和按钮标签
+
+### Step 5: 前端API集成（20min）
+1. 在 `api/index.ts` 添加新API
+2. 集成到 `MissingValueDialog.tsx`
+3. 实现实时统计获取
+4. 实现填补预览
+
+### Step 6: 端到端测试（30min）
+1. 测试均值填补
+2. 测试中位数填补
+3. 测试众数填补
+4. 测试固定值填补
+5. 测试MICE填补
+6. 测试删除功能（确保未破坏原有功能）
+
+### Step 7: 优化和文档（20min）
+1. 添加错误处理
+2. 优化加载状态
+3. 更新用户提示
+4. 记录开发总结
+
+**总计：约3小时**
+
+---
+
+## 🧪 测试计划
+
+### 功能测试用例
+
+#### 测试数据准备
+```
+- 数值列（正态分布）：年龄（缺失15%）
+- 数值列（偏态分布）：体重（缺失20%）
+- 分类列：婚姻状况（缺失10%）
+- 多列缺失：收缩压（15%）+ 舒张压（12%）
+```
+
+#### 测试用例
+
+| 编号 | 功能 | 测试场景 | 预期结果 |
+|------|------|----------|----------|
+| TC-1 | 均值填补 | 对"年龄"列使用均值填补，新列名"年龄_填补" | 创建新列，缺失值被均值填充，原列不变 ✅ |
+| TC-2 | 中位数填补 | 对"体重"列使用中位数填补 | 创建新列，缺失值被中位数填充 ✅ |
+| TC-3 | 众数填补 | 对"婚姻状况"列使用众数填补 | 创建新列，缺失值被众数填充 ✅ |
+| TC-4 | 固定值填补（数值） | 对"年龄"列填充固定值"0" | 创建新列，所有缺失值变为0 ✅ |
+| TC-5 | 固定值填补（文本） | 对"婚姻状况"列填充"未知" | 创建新列，所有缺失值变为"未知" ✅ |
+| TC-6 | MICE填补 | 选择"收缩压"+"舒张压"，执行MICE | 创建2个新列（_MICE后缀），缺失值被预测 ✅ |
+| TC-7 | 新列位置验证 ⭐ | 对"列A"填补，查看新列位置 | 新列"列A_填补"紧邻原列"列A"右侧 ✅ |
+| TC-8 | MICE新列位置 ⭐ | 对"列A"+"列C"执行MICE | 列A_MICE在列A旁，列C_MICE在列C旁 ✅ |
+| TC-9 | 统计信息准确性 | 选择任意列，查看统计信息 | 显示正确的缺失数、均值、中位数等 |
+| TC-10 | 删除功能保留 | Tab 1删除缺失行 | 功能正常，与原功能一致 |
+| TC-11 | 空列处理 | 对完全无缺失的列执行填补 | 提示"该列无缺失值"或复制原列 |
+| TC-12 | 全缺失列处理 | 对全部缺失的列执行填补 | 提示警告，仍创建新列（全部为填补值） |
+| TC-13 | 重复新列名处理 | 新列名已存在 | 自动添加后缀（如"体重_填补_1"）或提示 |
+| TC-14 | 原始数据保留 ⭐ | 填补后，检查原列 | 原列数据完全不变 ✅ |
+
+### 边界测试
+
+| 测试项 | 场景 | 预期 |
+|--------|------|------|
+| 超大数据集 | 10万行数据执行MICE | 显示进度，不崩溃 |
+| 特殊字符列名 | 列名带括号、等号 | 正常处理（使用columnMapping） |
+| 数据类型混合 | 对文本列执行均值填补 | 提示错误或自动跳过 |
+| 并发处理 | 同时打开多个Dialog | 状态隔离，不互相影响 |
+
+---
+
+## 📊 性能要求
+
+| 操作 | 数据量 | 目标响应时间 |
+|------|--------|--------------|
+| 简单填补（均值/中位数/众数） | 1万行 | < 1秒 |
+| 简单填补 | 10万行 | < 5秒 |
+| MICE填补 | 1万行 | < 10秒 |
+| MICE填补 | 10万行 | < 60秒 |
+| 统计信息获取 | 任意 | < 0.5秒 |
+
+---
+
+## 🚨 风险和注意事项
+
+### 1. 数据安全 ✅ 已解决
+- ✅ 填补操作创建新列，原始数据完全保留
+- ✅ 新列紧邻原列，便于对比验证
+- ✅ 无需撤销功能（原始数据未被修改）
+- ✅ 用户可随时删除填补后的列，或重新填补
+
+### 2. MICE性能 ⭐ 重点关注
+- ⚠️ MICE在大数据集上可能很慢（10万行约1分钟）
+- ✅ **必须显示进度条或加载动画**
+- ✅ 添加"预计耗时"提示（基于数据量估算）
+- ✅ 提供"取消执行"按钮（长时间任务）
+- 💡 优化建议：考虑使用Web Worker或后台任务队列
+
+### 3. 数据类型兼容性
+- ⚠️ 均值/中位数只适用于数值列
+- ✅ 需要前端验证列的数据类型
+- ✅ 后端也需要校验并返回友好错误
+
+### 4. 列名特殊字符
+- ⚠️ 列名可能包含特殊字符
+- ✅ 使用现有的 `columnMapping` 机制
+- ✅ 确保与compute列功能一致
+
+### 5. 全部缺失的列
+- ⚠️ 如果列全部为空，均值/中位数为NaN
+- ✅ 需要特殊处理并提示用户
+
+---
+
+## 📝 依赖项
+
+### Python依赖（需要确认）
+```
+pandas >= 1.5.0
+numpy >= 1.23.0
+scikit-learn >= 1.2.0  # ← MICE需要
+```
+
+### 前端依赖
+- 无新增依赖（使用现有的Ant Design组件）
+
+---
+
+## 🎯 验收标准
+
+### 必须满足 ⭐
+1. ✅ **MICE功能完全实现**（非常重要！）
+2. ✅ **新列位置正确**（紧邻原列右侧）
+3. ✅ **原始数据完全保留**（填补不修改原列）
+4. ✅ 所有测试用例通过（特别是TC-7, TC-8, TC-14）
+5. ✅ 无Breaking Changes（原有删除功能不受影响）
+6. ✅ UI符合设计稿（3个Tab切换流畅）
+7. ✅ 代码通过Linter检查
+8. ✅ 添加适当的日志和错误处理
+9. ✅ MICE显示进度条或加载动画
+
+### 加分项
+1. ⭐ 性能优于预期
+2. ⭐ UI动画流畅
+3. ⭐ 错误提示友好且具体
+4. ⭐ 添加单元测试
+
+---
+
+## 📅 时间估算（已更新）
+
+| 阶段 | 预计时间 | 备注 |
+|------|----------|------|
+| Python后端 - 简单填补 | 40分钟 | fillna.py基础功能 |
+| Python后端 - MICE填补 ⭐ | 50分钟 | **必须实现**，包括sklearn集成 |
+| Python后端 - 新列插入逻辑 | 30分钟 | df.insert()实现，确保新列紧邻原列 |
+| Python - main.py端点 | 20分钟 | 新增3个API端点 |
+| Node.js后端 | 20分钟 | 简单转发 |
+| 前端UI - Tab结构 | 30分钟 | 3个Tab切换 |
+| 前端UI - Tab 2（简单填补） | 40分钟 | 表单 + 统计 + 新列名输入 |
+| 前端UI - Tab 3（MICE） ⭐ | 40分钟 | 多选列 + 参数 + 进度条 |
+| API集成 | 30分钟 | 前端调用后端，处理新列名 |
+| 测试 | 40分钟 | 14个测试用例，重点测试新列位置 |
+| 优化和文档 | 20分钟 | 错误处理 + 文档 |
+| **总计** | **约5-6小时** | **包含完整MICE实现** ⭐ |
+
+**说明**：
+- MICE是医学研究的核心需求，必须完整实现
+- 新列插入逻辑需要仔细处理，确保位置正确
+- 前端需要额外时间处理新列名输入和预览
+
+---
+
+## 📚 参考资料
+
+### 缺失值填补理论
+- [sklearn.impute.IterativeImputer文档](https://scikit-learn.org/stable/modules/generated/sklearn.impute.IterativeImputer.html)
+- [MICE算法原理](https://www.jstatsoft.org/article/view/v045i03)
+
+### 医学研究中的缺失值处理
+- 均值/中位数填补：最常用，简单快速
+- MICE：高质量研究首选，考虑变量间关系
+- 分组填补：不同人群特征差异大时使用
+
+---
+
+## ✅ 开发前确认清单
+
+已确认事项 ✅：
+
+- [x] **MICE功能必须开发**（医学研究核心需求）✅
+- [x] **填补方式：创建新列**（保留原始数据）✅
+- [x] **新列位置：紧邻原列**（便于对比验证）✅
+- [x] **无需撤销功能**（原始数据未被修改）✅
+- [x] UI设计符合预期（3个Tab切换）✅
+- [x] 功能范围合理（Phase 1不包括分组填补、插值等）✅
+- [x] 性能要求合理（MICE 10万行<60秒）✅
+- [x] 测试用例完整（14个测试用例）✅
+- [x] 时间估算可接受（约3-4小时）✅
+
+## 🚀 准备开始开发
+
+所有确认清单已完成，随时可以开始实施！
+
+---
+
+## 📝 更新记录
+
+### 2025-12-09 更新（根据用户确认）
+
+**核心变更**：
+1. ✅ **MICE功能必须实现**（医学研究核心需求）
+2. ✅ **填补方式改为创建新列**（保留原始数据）
+3. ✅ **新列位置：紧邻原列右侧**（便于对比验证）
+4. ✅ **取消撤销功能**（原始数据未被修改，无需撤销）
+
+**影响**：
+- Python函数签名新增 `new_column_name` 参数
+- UI新增"新列名"输入框
+- 实现逻辑使用 `df.insert()` 确保位置正确
+- 测试用例新增新列位置验证（TC-7, TC-8）
+- 开发时间从3小时增加到5-6小时（MICE+新列逻辑）
+
+**优势**：
+- ✅ 原始数据完全保留，数据安全性更高
+- ✅ 新旧数据并列显示，便于验证填补效果
+- ✅ 用户可多次尝试不同填补方法对比
+- ✅ 符合医学研究的严谨性要求
+
+---
+
+**已确认，准备开始开发！** 🚀
+
--- a/extraction_service/main.py
+++ b/extraction_service/main.py
@@ -97,6 +97,8 @@ class RecodeRequest(BaseModel):
    mapping: Dict[Any, Any]
    create_new_column: bool = True
    new_column_name: str = None
+    na_handling: str = 'keep'  # ✨ 新增：NA处理方式（keep/map/drop）
+    na_value: Any = None  # ✨ 新增：NA映射值

 class BinningRequest(BaseModel):
    """分箱请求模型"""
@@ -107,6 +109,9 @@ class BinningRequest(BaseModel):
    bins: List[Any] = None
    labels: List[Any] = None
    num_bins: int = 3
+    na_handling: str = 'keep'  # ✨ 新增：NA处理方式（keep/label/assign）
+    na_label: str = None  # ✨ 新增：NA标签
+    na_assign_to: int = None  # ✨ 新增：NA分配到的组索引

 class ConditionalRequest(BaseModel):
    """条件生成列请求模型"""
@@ -127,6 +132,7 @@ class ComputeRequest(BaseModel):
    data: List[Dict[str, Any]]
    new_column_name: str
    formula: str
+    column_mapping: List[Dict[str, str]] = []  # ✨ 新增：列名映射

 class PivotRequest(BaseModel):
    """Pivot请求模型"""
@@ -135,6 +141,11 @@ class PivotRequest(BaseModel):
    pivot_column: str
    value_columns: List[str]
    aggfunc: str = 'first'
+    column_mapping: List[Dict[str, str]] = []  # ✨ 列名映射
+    keep_unused_columns: bool = False  # ✨ 是否保留未选择的列
+    unused_agg_method: str = 'first'  # ✨ 未选择列的聚合方式（first/mode/mean）
+    original_column_order: List[str] = []  # ✨ 新增：原始列顺序
+    pivot_value_order: List[str] = []  # ✨ 新增：透视列值的原始顺序


 # ==================== API路由 ====================
@@ -763,13 +774,15 @@ async def operation_recode(request: RecodeRequest):
            # 转换为DataFrame
            df = pd.DataFrame(request.data)
            
-            # 调用预写函数
+            # 调用预写函数（传递NA处理参数）
            result_df = apply_recode(
                df,
                request.column,
                request.mapping,
                request.create_new_column,
-                request.new_column_name
+                request.new_column_name,
+                request.na_handling,  # ✨ NA处理方式
+                request.na_value  # ✨ NA映射值
            )
            
            # 转换回JSON（处理NaN和inf值）
@@ -840,7 +853,7 @@ async def operation_binning(request: BinningRequest):
            # 转换为DataFrame
            df = pd.DataFrame(request.data)
            
-            # 调用预写函数
+            # 调用预写函数（传递NA处理参数）
            result_df = apply_binning(
                df,
                request.column,
@@ -848,7 +861,10 @@ async def operation_binning(request: BinningRequest):
                request.new_column_name,
                request.bins,
                request.labels,
-                request.num_bins
+                request.num_bins,
+                request.na_handling,  # ✨ NA处理方式
+                request.na_label,  # ✨ NA标签
+                request.na_assign_to  # ✨ NA分配到的组索引
            )
            
            # 转换回JSON（处理Categorical类型、NaN值和inf值）
@@ -1106,11 +1122,12 @@ async def operation_compute(request: ComputeRequest):
            # 转换为DataFrame
            df = pd.DataFrame(request.data)
            
-            # 调用预写函数
+            # ✨ 调用预写函数（传递column_mapping）
            result_df = compute_column(
                df,
                request.new_column_name,
-                request.formula
+                request.formula,
+                request.column_mapping  # ✨ 传递列名映射
            )
            
            # 转换回JSON（处理NaN值和inf值）
@@ -1201,13 +1218,18 @@ async def operation_pivot(request: PivotRequest):
            # 转换为DataFrame
            df = pd.DataFrame(request.data)
            
-            # 调用预写函数
+            # ✨ 调用预写函数（传递column_mapping和未选择列处理参数）
            result_df = pivot_long_to_wide(
                df,
                request.index_column,
                request.pivot_column,
                request.value_columns,
-                request.aggfunc
+                request.aggfunc,
+                request.column_mapping,  # ✨ 传递列名映射
+                request.keep_unused_columns,  # ✨ 是否保留未选择的列
+                request.unused_agg_method,  # ✨ 未选择列的聚合方式
+                request.original_column_order,  # ✨ 原始列顺序
+                request.pivot_value_order  # ✨ 透视列值的原始顺序
            )
            
            # 转换回JSON（处理NaN和inf值）
--- a/extraction_service/operations/binning.py
+++ b/extraction_service/operations/binning.py
@@ -17,7 +17,10 @@ def apply_binning(
    new_column_name: str,
    bins: Optional[List[Union[int, float]]] = None,
    labels: Optional[List[Union[str, int]]] = None,
-    num_bins: int = 3
+    num_bins: int = 3,
+    na_handling: Literal['keep', 'label', 'assign'] = 'keep',
+    na_label: Optional[str] = None,
+    na_assign_to: Optional[int] = None
 ) -> pd.DataFrame:
    """
    应用分箱操作
@@ -33,16 +36,23 @@ def apply_binning(
        bins: 自定义切点列表（仅method='custom'时使用），如 [18, 60] → <18, 18-60, >60
        labels: 标签列表（可选）
        num_bins: 分组数量（仅method='equal_width'或'equal_freq'时使用）
+        na_handling: NA值处理方式
+            - 'keep': 保持为NA（默认）
+            - 'label': 标记为指定标签
+            - 'assign': 分配到指定组
+        na_label: 当na_handling='label'时，NA的标签（如"缺失"）
+        na_assign_to: 当na_handling='assign'时，NA分配到的组索引
    
    Returns:
        分箱后的数据框
    
    Examples:
-        >>> df = pd.DataFrame({'年龄': [15, 25, 35, 45, 55, 65, 75]})
+        >>> df = pd.DataFrame({'年龄': [15, 25, 35, 45, 55, 65, 75, None]})
        >>> result = apply_binning(df, '年龄', 'custom', '年龄分组', 
-        ...                        bins=[18, 60], labels=['青少年', '成年', '老年'])
+        ...                        bins=[18, 60], labels=['青少年', '成年', '老年'],
+        ...                        na_handling='label', na_label='缺失')
        >>> result['年龄分组'].tolist()
-        ['青少年', '成年', '成年', '成年', '成年', '老年', '老年']
+        ['青少年', '成年', '成年', '成年', '成年', '老年', '老年', '缺失']
    """
    if df.empty:
        return df
@@ -54,6 +64,10 @@ def apply_binning(
    # 创建结果数据框
    result = df.copy()
    
+    # ✨ 记录原始NA的位置（在分箱前）
+    original_na_mask = result[column].isna()
+    original_na_count = original_na_mask.sum()
+    
    # 验证并转换数据类型
    if not pd.api.types.is_numeric_dtype(result[column]):
        # 尝试将字符串转换为数值
@@ -136,6 +150,9 @@ def apply_binning(
    else:
        raise ValueError(f"不支持的分箱方法: {method}")
    
+    # ✨ 重要：将Categorical类型转换为object类型，避免"nan"字符串问题
+    result[new_column_name] = result[new_column_name].astype('object')
+    
    # ✨ 优化：将新列移到原列旁边
    original_col_index = result.columns.get_loc(column)
    cols = list(result.columns)
@@ -145,6 +162,27 @@ def apply_binning(
    cols.insert(original_col_index + 1, new_column_name)
    result = result[cols]
    
+    # ✨ 处理NA值（使用分箱前记录的NA位置）
+    if original_na_count > 0:
+        if na_handling == 'keep':
+            # 保持为NA（显式设置为None，避免显示为"nan"字符串）
+            result.loc[original_na_mask, new_column_name] = None
+            print(f'📊 NA处理：保持为NA（{original_na_count}个）', flush=True)
+        
+        elif na_handling == 'label':
+            # 标记为指定标签
+            label_to_use = na_label if na_label else '空值/NA'
+            result.loc[original_na_mask, new_column_name] = label_to_use
+            print(f'📊 NA处理：标记为 "{label_to_use}"（{original_na_count}个）', flush=True)
+        
+        elif na_handling == 'assign':
+            # 分配到指定组（通过labels）
+            if labels and na_assign_to is not None and 0 <= na_assign_to < len(labels):
+                result.loc[original_na_mask, new_column_name] = labels[na_assign_to]
+                print(f'📊 NA处理：分配到组 "{labels[na_assign_to]}"（{original_na_count}个）', flush=True)
+            else:
+                print(f'⚠️  警告：na_assign_to无效，NA保持为空', flush=True)
+    
    # 统计分布
    print(f'分箱结果分布:')
    value_counts = result[new_column_name].value_counts().sort_index()
--- a/extraction_service/operations/conditional.py
+++ b/extraction_service/operations/conditional.py
@@ -109,6 +109,10 @@ def apply_conditional_column(
                mask = col_data >= value
            elif operator == '<=':
                mask = col_data <= value
+            elif operator == 'is_null':  # ✨ 新增：为空
+                mask = result[column].isna()
+            elif operator == 'not_null':  # ✨ 新增：不为空
+                mask = result[column].notna()
            else:
                raise ValueError(f'不支持的运算符: {operator}')
            
--- a/extraction_service/operations/pivot.py
+++ b/extraction_service/operations/pivot.py
@@ -1,10 +1,12 @@
 """
 Pivot操作 - 预写函数
 长表转宽表（一人多行 → 一人一行）
+
+✨ 方案B实现：支持列名映射
 """

 import pandas as pd
-from typing import List, Literal, Optional
+from typing import List, Literal, Optional, Dict


 def pivot_long_to_wide(
@@ -12,7 +14,12 @@ def pivot_long_to_wide(
    index_column: str,
    pivot_column: str,
    value_columns: List[str],
-    aggfunc: Literal['first', 'last', 'mean', 'sum', 'min', 'max'] = 'first'
+    aggfunc: Literal['first', 'last', 'mean', 'sum', 'min', 'max'] = 'first',
+    column_mapping: Optional[List[Dict[str, str]]] = None,
+    keep_unused_columns: bool = False,
+    unused_agg_method: Literal['first', 'mode', 'mean'] = 'first',
+    original_column_order: Optional[List[str]] = None,
+    pivot_value_order: Optional[List[str]] = None
 ) -> pd.DataFrame:
    """
    长表转宽表（Pivot）
@@ -25,33 +32,39 @@ def pivot_long_to_wide(
        pivot_column: 透视列（将变成新列名的列，如 Event Name）
        value_columns: 值列（要转置的数据列，如 FMA得分, ADL得分）
        aggfunc: 聚合函数
-            - 'first': 取第一个值（推荐）
-            - 'last': 取最后一个值
-            - 'mean': 求平均值
-            - 'sum': 求和
-            - 'min': 取最小值
-            - 'max': 取最大值
+        column_mapping: 列名映射（可选）
+        keep_unused_columns: 是否保留未选择的列（默认False）
+        unused_agg_method: 未选择列的聚合方式（'first'=取第一个值, 'mode'=取众数, 'mean'=取均值）
+        original_column_order: 原始列顺序（用于保持列顺序一致）
+        pivot_value_order: 透视列值的原始顺序（用于保持透视值顺序一致）
    
    Returns:
        宽表数据框
-    
-    示例:
-        pivot_long_to_wide(
-            df,
-            index_column='Record ID',
-            pivot_column='Event Name',
-            value_columns=['FMA得分', 'ADL得分'],
-            aggfunc='first'
-        )
    """
    result = df.copy()
    
-    print(f'原始数据: {len(result)} 行 × {len(result.columns)} 列')
-    print(f'索引列: {index_column}')
-    print(f'透视列: {pivot_column}')
-    print(f'值列: {", ".join(value_columns)}')
-    print(f'聚合方式: {aggfunc}')
-    print('')
+    print(f'━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━', flush=True)
+    print(f'📊 Pivot转换', flush=True)
+    print(f'原始数据: {len(result)} 行 × {len(result.columns)} 列', flush=True)
+    print(f'索引列: {index_column}', flush=True)
+    print(f'透视列: {pivot_column}', flush=True)
+    print(f'值列: {", ".join(value_columns)}', flush=True)
+    print(f'聚合方式: {aggfunc}', flush=True)
+    
+    # ✨ 检测未选择的列
+    all_columns = set(result.columns)
+    used_columns = {index_column, pivot_column} | set(value_columns)
+    unused_columns = list(all_columns - used_columns)
+    
+    if unused_columns:
+        print(f'', flush=True)
+        print(f'📋 未选择的列（{len(unused_columns)}个）: {", ".join(unused_columns[:5])}{"..." if len(unused_columns) > 5 else ""}', flush=True)
+        if keep_unused_columns:
+            print(f'✓ 将保留未选择的列（聚合方式: {unused_agg_method}）', flush=True)
+        else:
+            print(f'⚠️  这些列将不会保留在结果中', flush=True)
+    
+    print('', flush=True)
    
    # 验证列是否存在
    required_cols = [index_column, pivot_column] + value_columns
@@ -61,67 +74,211 @@ def pivot_long_to_wide(
    
    # 检查索引列的唯一值数量
    unique_index = result[index_column].nunique()
-    print(f'唯一{index_column}数量: {unique_index}')
+    print(f'✓ 唯一{index_column}数量: {unique_index}', flush=True)
    
-    # 检查透视列的唯一值
-    unique_pivot = result[pivot_column].unique()
-    print(f'透视列"{pivot_column}"的唯一值: {list(unique_pivot)}')
-    print('')
+    # 检查透视列的唯一值（重要！）
+    unique_pivot_values = result[pivot_column].unique()
+    print(f'✓ 透视列"{pivot_column}"的唯一值: {list(unique_pivot_values)}', flush=True)
+    print(f'✓ 唯一值数量: {len(unique_pivot_values)}', flush=True)
+    
+    # ⚠️ 关键检查：如果唯一值只有1个，警告用户
+    if len(unique_pivot_values) == 1:
+        print(f'', flush=True)
+        print(f'⚠️  警告: 透视列只有1个唯一值！', flush=True)
+        print(f'   这意味着Pivot后只会生成1列（而不是多列）', flush=True)
+        print(f'   请检查：', flush=True)
+        print(f'   1. 透视列是否选择正确？', flush=True)
+        print(f'   2. 数据是否已经是宽表格式？', flush=True)
+        print(f'', flush=True)
+    
+    print('', flush=True)
    
    try:
-        # 执行Pivot转换
+        # ✅ 执行Pivot转换（dropna=False保留全NaN的列）
        df_pivot = result.pivot_table(
            index=index_column,
            columns=pivot_column,
            values=value_columns,
-            aggfunc=aggfunc
+            aggfunc=aggfunc,
+            dropna=False  # ✨ 关键：不删除全NaN的列，确保所有组合都生成
        )
        
-        # ✨ 增强：展平多级列名（处理特殊字符）
-        # 如果只有一个值列，列名是单层的
+        print(f'✓ Pivot执行成功', flush=True)
+        print(f'  Pivot后shape: {df_pivot.shape}', flush=True)
+        print(f'  列数: {len(df_pivot.columns)}', flush=True)
+        print(f'', flush=True)
+        
+        # ✨ 修复：更健壮的列名展平逻辑
        if len(value_columns) == 1:
-            # 清理列名中的特殊字符，使用安全的分隔符
-            value_col_clean = str(value_columns[0]).replace('(', '').replace(')', '').replace('=', '').strip()
-            df_pivot.columns = [f'{value_col_clean}___{str(col).replace(" ", "_")}' for col in df_pivot.columns]
-        else:
-            # 多个值列，列名是多层的，需要展平
-            # 使用三个下划线作为分隔符（避免与列名中的下划线冲突）
+            # 单个值列：列名是单层的 (pivot_value1, pivot_value2, ...)
+            print(f'📝 单值列模式：展平列名', flush=True)
+            
+            # 获取原始值列名（用于生成新列名）
+            value_col_name = value_columns[0]
+            
+            # 生成新列名：值列名___透视值
            new_columns = []
-            for col in df_pivot.columns.values:
-                if isinstance(col, tuple):
-                    # 清理每个部分的特殊字符
-                    parts = [str(c).replace('(', '').replace(')', '').replace('=', '').strip() for c in col]
-                    new_col = '___'.join(parts)
-                else:
-                    new_col = str(col).replace('(', '').replace(')', '').replace('=', '').strip()
-                new_columns.append(new_col)
+            for col in df_pivot.columns:
+                # col 是透视列的某个值（如 0, 1, 2）
+                new_col_name = f'{value_col_name}___{col}'
+                new_columns.append(new_col_name)
+                print(f'  生成列: {new_col_name}', flush=True)
+            
            df_pivot.columns = new_columns
            
+        else:
+            # 多个值列：列名是多层的 ((value_col1, pivot_val1), (value_col1, pivot_val2), ...)
+            print(f'📝 多值列模式：展平多级列名', flush=True)
+            
+            new_columns = []
+            for col in df_pivot.columns:
+                if isinstance(col, tuple):
+                    # 元组：(值列名, 透视值)
+                    value_name, pivot_value = col
+                    new_col_name = f'{value_name}___{pivot_value}'
+                    new_columns.append(new_col_name)
+                    print(f'  {col} → {new_col_name}', flush=True)
+                else:
+                    # 单个值（不应该出现，但防御性编程）
+                    new_columns.append(str(col))
+            
+            df_pivot.columns = new_columns
+        
+        print(f'', flush=True)
+        print(f'✓ 列名展平完成', flush=True)
+        print(f'', flush=True)
+        
        # 重置索引（将index列变回普通列）
        df_pivot = df_pivot.reset_index()
        
-        # ✨ 优化：保持原始行顺序（按照index_column排序）
-        # 获取原始数据中index_column的顺序
+        # ✨ 新功能：保留未选择的列
+        if keep_unused_columns and unused_columns:
+            print(f'', flush=True)
+            print(f'📦 正在处理未选择的列...', flush=True)
+            
+            # 对未选择的列进行聚合
+            if unused_agg_method == 'first':
+                # 取第一个非空值
+                unused_df = result.groupby(index_column)[unused_columns].first().reset_index()
+                print(f'✓ 聚合方式：取第一个值', flush=True)
+                
+            elif unused_agg_method == 'mode':
+                # 取众数
+                def get_mode(x):
+                    mode_vals = x.mode()
+                    return mode_vals[0] if len(mode_vals) > 0 else None
+                
+                unused_df = result.groupby(index_column)[unused_columns].agg(get_mode).reset_index()
+                print(f'✓ 聚合方式：取众数', flush=True)
+                
+            elif unused_agg_method == 'mean':
+                # 取均值（区分数值列和非数值列）
+                numeric_cols = [col for col in unused_columns if pd.api.types.is_numeric_dtype(result[col])]
+                non_numeric_cols = [col for col in unused_columns if col not in numeric_cols]
+                
+                # 数值列取均值
+                if numeric_cols:
+                    numeric_df = result.groupby(index_column)[numeric_cols].mean()
+                else:
+                    numeric_df = pd.DataFrame(index=result[index_column].unique())
+                
+                # 非数值列取第一个值
+                if non_numeric_cols:
+                    non_numeric_df = result.groupby(index_column)[non_numeric_cols].first()
+                else:
+                    non_numeric_df = pd.DataFrame(index=result[index_column].unique())
+                
+                # 合并
+                unused_df = pd.concat([numeric_df, non_numeric_df], axis=1).reset_index()
+                print(f'✓ 聚合方式：数值列取均值，非数值列取第一个值', flush=True)
+            
+            else:
+                # 默认取第一个值
+                unused_df = result.groupby(index_column)[unused_columns].first().reset_index()
+            
+            # 合并到pivot结果中
+            df_pivot = df_pivot.merge(unused_df, on=index_column, how='left')
+            
+            print(f'✓ 已保留 {len(unused_columns)} 个未选择的列', flush=True)
+            for col in unused_columns[:5]:
+                print(f'  • {col}', flush=True)
+            if len(unused_columns) > 5:
+                print(f'  • ... 还有 {len(unused_columns) - 5} 列', flush=True)
+        
+        # ✨ 优化：保持原始行顺序
        original_order = result[index_column].drop_duplicates().tolist()
-        # 创建排序映射
        order_map = {val: idx for idx, val in enumerate(original_order)}
-        # 添加临时排序列
        df_pivot['_sort_order'] = df_pivot[index_column].map(order_map)
-        # 按原始顺序排序
        df_pivot = df_pivot.sort_values('_sort_order').drop(columns=['_sort_order']).reset_index(drop=True)
        
-        print(f'转换成功！')
-        print(f'结果: {len(df_pivot)} 行 × {len(df_pivot.columns)} 列')
-        print(f'新增列: {len(df_pivot.columns) - 1} 列')
-        print('')
+        # ✨ 新增：保持原始列顺序
+        if original_column_order:
+            print(f'', flush=True)
+            print(f'🔄 按原始列顺序重排列...', flush=True)
            
-        # 显示新列名
-        print(f'生成的列名:')
+            # ✅ 关键：一次遍历，逐列判断（转置列展开，未选择列保持）
+            final_cols = [index_column]
+            
+            for orig_col in original_column_order:
+                if orig_col == index_column or orig_col == pivot_column:
+                    continue  # 跳过索引列和透视列
+                
+                if orig_col in value_columns:
+                    # ✅ 这个列被选择转置 → 添加展开后的所有列
+                    related_cols = [c for c in df_pivot.columns if c.startswith(f'{orig_col}___')]
+                    
+                    # ✨ 按透视列的原始顺序排序（而不是字母顺序）
+                    if pivot_value_order:
+                        # 创建顺序映射
+                        pivot_order_map = {val: idx for idx, val in enumerate(pivot_value_order)}
+                        
+                        # 对related_cols按透视值顺序排序
+                        def get_pivot_value(col_name):
+                            # 从 "FMA___基线" 提取 "基线"
+                            parts = col_name.split('___')
+                            if len(parts) == 2:
+                                return parts[1]
+                            return col_name
+                        
+                        related_cols_sorted = sorted(
+                            related_cols,
+                            key=lambda c: pivot_order_map.get(get_pivot_value(c), 999)
+                        )
+                    else:
+                        # 如果没有提供透视值顺序，保持现有顺序
+                        related_cols_sorted = sorted(related_cols)
+                    
+                    final_cols.extend(related_cols_sorted)
+                    print(f'  • {orig_col} → {len(related_cols_sorted)}个转置列', flush=True)
+                
+                elif keep_unused_columns and orig_col in df_pivot.columns:
+                    # ✅ 这个列未被选择 → 如果保留，直接添加
+                    final_cols.append(orig_col)
+                    print(f'  • {orig_col} → 保持不变', flush=True)
+            
+            # 添加任何剩余的列（防御性编程）
+            for col in df_pivot.columns:
+                if col not in final_cols:
+                    final_cols.append(col)
+                    print(f'  • {col} → 剩余列', flush=True)
+            
+            # 重排列
+            df_pivot = df_pivot[final_cols]
+            print(f'✓ 列顺序已按原始顺序重排（总计{len(final_cols)}列）', flush=True)
+        
+        print(f'✅ 转换成功！', flush=True)
+        print(f'📊 结果: {len(df_pivot)} 行 × {len(df_pivot.columns)} 列', flush=True)
+        print(f'📈 新增列: {len(df_pivot.columns) - 1} 列', flush=True)
+        print(f'', flush=True)
+        
+        # 显示所有新列名
+        print(f'📋 生成的列名:', flush=True)
        new_cols = [col for col in df_pivot.columns if col != index_column]
-        for i, col in enumerate(new_cols[:10], 1):  # 只显示前10个
-            print(f'  {i}. {col}')
-        if len(new_cols) > 10:
-            print(f'  ... 还有 {len(new_cols) - 10} 列')
+        for i, col in enumerate(new_cols, 1):
+            print(f'  {i}. {col}', flush=True)
+        
+        print(f'━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━', flush=True)
+        print(f'', flush=True)
        
        return df_pivot
        
@@ -180,5 +337,3 @@ def get_pivot_preview(
        'estimated_rows': int(unique_index),
        'estimated_columns': len(unique_pivot)
    }
-
-
--- a/extraction_service/operations/recode.py
+++ b/extraction_service/operations/recode.py
@@ -5,7 +5,8 @@
 """

 import pandas as pd
-from typing import Dict, Any, Optional
+import numpy as np
+from typing import Dict, Any, Optional, Literal


 def apply_recode(
@@ -13,7 +14,9 @@ def apply_recode(
    column: str,
    mapping: Dict[Any, Any],
    create_new_column: bool = True,
-    new_column_name: Optional[str] = None
+    new_column_name: Optional[str] = None,
+    na_handling: Literal['keep', 'map', 'drop'] = 'keep',
+    na_value: Any = None
 ) -> pd.DataFrame:
    """
    应用数值映射
@@ -24,16 +27,21 @@ def apply_recode(
        mapping: 映射字典，如 {'男': 1, '女': 2}
        create_new_column: 是否创建新列（True）或覆盖原列（False）
        new_column_name: 新列名（create_new_column=True时使用）
+        na_handling: NA值处理方式
+            - 'keep': 保持为NA（默认）
+            - 'map': 映射为指定值
+            - 'drop': 删除包含NA的行
+        na_value: 当na_handling='map'时，NA映射到的值
    
    Returns:
        重编码后的数据框
    
    Examples:
-        >>> df = pd.DataFrame({'性别': ['男', '女', '男', '女']})
+        >>> df = pd.DataFrame({'性别': ['男', '女', '男', None]})
        >>> mapping = {'男': 1, '女': 2}
-        >>> result = apply_recode(df, '性别', mapping, True, '性别_编码')
+        >>> result = apply_recode(df, '性别', mapping, True, '性别_编码', na_handling='map', na_value=0)
        >>> result['性别_编码'].tolist()
-        [1, 2, 1, 2]
+        [1, 2, 1, 0]
    """
    if df.empty:
        return df
@@ -54,6 +62,9 @@ def apply_recode(
    # 创建结果数据框（避免修改原数据）
    result = df.copy()
    
+    # ✨ 统计原始NA数量
+    original_na_count = result[column].isna().sum()
+    
    # ✨ 优化：如果是创建新列，插入到原列旁边
    if create_new_column:
        original_col_index = result.columns.get_loc(column)
@@ -62,6 +73,26 @@ def apply_recode(
        # 覆盖原列
        result[target_column] = result[column].map(mapping)
    
+    # ✨ 处理NA值
+    if original_na_count > 0:
+        na_mask = result[column].isna()
+        
+        if na_handling == 'keep':
+            # 保持为NA（已经是NA，无需操作）
+            print(f'📊 NA处理：保持为NA（{original_na_count}个）')
+        
+        elif na_handling == 'map':
+            # 映射为指定值
+            result.loc[na_mask, target_column] = na_value
+            print(f'📊 NA处理：映射为 {na_value}（{original_na_count}个）')
+        
+        elif na_handling == 'drop':
+            # 删除包含NA的行
+            rows_before = len(result)
+            result = result[~na_mask].copy()
+            rows_after = len(result)
+            print(f'📊 NA处理：删除包含NA的行（删除{rows_before - rows_after}行）')
+    
    # 统计结果
    mapped_count = result[target_column].notna().sum()
    unmapped_count = result[target_column].isna().sum()
--- a/force_commit.ps1
+++ b/force_commit.ps1
@@ -0,0 +1,72 @@
+# 强制Git提交脚本
+# 用于解决rebase卡住的问题
+
+Write-Host "正在中止rebase..." -ForegroundColor Yellow
+git rebase --abort 2>$null
+
+Write-Host "检查当前状态..." -ForegroundColor Cyan
+git status
+
+Write-Host "`n准备提交..." -ForegroundColor Yellow
+
+# 添加所有修改（排除测试文件）
+git add -A
+git reset HEAD backend/uploads/ 2>$null
+
+# 提交
+$commitMessage = @"
+feat(dc/tool-c): Add pivot column ordering and NA handling features
+
+Major features:
+1. Pivot transformation enhancements:
+   - Add option to keep unselected columns with 3 aggregation methods
+   - Maintain original column order after pivot (aligned with source file)
+   - Preserve pivot value order (first appearance order)
+
+2. NA handling across 4 core functions:
+   - Recode: Support keep/map/drop for NA values
+   - Filter: Already supports is_null/not_null operators
+   - Binning: Support keep/label/assign for NA values (fix 'nan' display)
+   - Conditional: Add is_null/not_null operators
+
+3. UI improvements:
+   - Enable column header tooltips with custom header component
+   - Add closeable alert for 50-row preview
+   - Fix page scrollbar issues
+
+Modified files:
+Python:
+- operations/pivot.py: Add column ordering logic
+- operations/recode.py: Add NA handling parameters
+- operations/binning.py: Add NA handling + fix Categorical to object conversion
+- operations/conditional.py: Add is_null/not_null operators
+- main.py: Update all request models
+
+Backend:
+- SessionController.ts: Return NA count in unique values API
+- QuickActionController.ts: Extract and pass column/pivot orders
+- QuickActionService.ts: Forward all new parameters
+
+Frontend:
+- PivotDialog.tsx: Add keep unused columns UI
+- RecodeDialog.tsx: Add NA handling dropdown
+- BinningDialog.tsx: Add NA handling radio group
+- ConditionalDialog.tsx: Hide value input for is_null/not_null
+- DataGrid.tsx: Add custom header component for tooltips
+- index.tsx: Add closeable preview alert
+
+Documentation:
+- Add pivot column ordering summary
+- Add NA handling summary
+- Update missing value processing plan
+
+Status: Ready for testing
+"@
+
+git commit -m $commitMessage
+
+Write-Host "`n准备强制推送..." -ForegroundColor Yellow
+git push -f origin master
+
+Write-Host "`n完成！" -ForegroundColor Green
+
--- a/frontend-v2/src/modules/dc/pages/tool-c/components/BinningDialog.tsx
+++ b/frontend-v2/src/modules/dc/pages/tool-c/components/BinningDialog.tsx
@@ -43,6 +43,11 @@ const BinningDialog: React.FC<BinningDialogProps> = ({
  
  const [loading, setLoading] = useState(false);
  
+  // ✨ NA处理
+  const [naHandling, setNaHandling] = useState<'keep' | 'label' | 'assign'>('keep');
+  const [naLabel, setNaLabel] = useState<string>('缺失');
+  const [naAssignTo, setNaAssignTo] = useState<number>(0);
+
  // 更新列选择
  const handleColumnChange = (value: string) => {
    setSelectedColumn(value);
@@ -118,6 +123,14 @@ const BinningDialog: React.FC<BinningDialogProps> = ({
      }
    }

+    // ✨ 添加NA处理参数
+    params.naHandling = naHandling;
+    if (naHandling === 'label') {
+      params.naLabel = naLabel;
+    } else if (naHandling === 'assign') {
+      params.naAssignTo = naAssignTo;
+    }
+
    setLoading(true);
    try {
      const response = await fetch('/api/v1/dc/tool-c/quick-action', {
@@ -328,6 +341,59 @@ const BinningDialog: React.FC<BinningDialogProps> = ({
                onChange={(e) => setNewColumnName(e.target.value)}
              />
            </div>
+
+            {/* ✨ NA处理区域 */}
+            <div className="bg-yellow-50 p-4 rounded-lg border border-yellow-200">
+              <div className="flex items-center gap-2 mb-3">
+                <span className="text-sm font-medium text-slate-700">⚠️ 空值/NA 处理</span>
+                <span className="text-xs text-slate-500">（原列中的空值如何处理）</span>
+              </div>
+              
+              <Radio.Group value={naHandling} onChange={(e) => setNaHandling(e.target.value)}>
+                <Space direction="vertical">
+                  <Radio value="keep">
+                    <span className="text-sm">保持为空（默认）</span>
+                  </Radio>
+                  <Radio value="label">
+                    <span className="text-sm">标记为指定标签</span>
+                    {naHandling === 'label' && (
+                      <Input
+                        placeholder="如：缺失、未知"
+                        value={naLabel}
+                        onChange={(e) => setNaLabel(e.target.value)}
+                        size="small"
+                        className="ml-2"
+                        style={{ width: 150 }}
+                      />
+                    )}
+                  </Radio>
+                  <Radio value="assign">
+                    <span className="text-sm">分配到指定组</span>
+                    {naHandling === 'assign' && (
+                      <Select
+                        value={naAssignTo}
+                        onChange={setNaAssignTo}
+                        size="small"
+                        className="ml-2"
+                        style={{ width: 150 }}
+                      >
+                        {(method === 'custom' 
+                          ? (customLabels.split(',').filter(l => l.trim()).length || 3)
+                          : numBins
+                        ) && Array.from({ length: method === 'custom' 
+                          ? (customLabels.split(',').filter(l => l.trim()).length || 3)
+                          : numBins
+                        }).map((_, i) => (
+                          <Select.Option key={i} value={i}>
+                            第 {i + 1} 组
+                          </Select.Option>
+                        ))}
+                      </Select>
+                    )}
+                  </Radio>
+                </Space>
+              </Radio.Group>
+            </div>
          </>
        )}

--- a/frontend-v2/src/modules/dc/pages/tool-c/components/ConditionalDialog.tsx
+++ b/frontend-v2/src/modules/dc/pages/tool-c/components/ConditionalDialog.tsx
@@ -51,6 +51,8 @@ const ConditionalDialog: React.FC<Props> = ({
    { label: '小于 (<)', value: '<' },
    { label: '大于等于 (>=)', value: '>=' },
    { label: '小于等于 (<=)', value: '<=' },
+    { label: '为空（空值/NA）', value: 'is_null' },  // ✨ 新增
+    { label: '不为空', value: 'not_null' },  // ✨ 新增
  ];

  // 添加规则
@@ -365,6 +367,8 @@ const ConditionalDialog: React.FC<Props> = ({
                            </Select.Option>
                          ))}
                        </Select>
+                        {/* ✨ 只在不是is_null/not_null时显示值输入框 */}
+                        {condition.operator !== 'is_null' && condition.operator !== 'not_null' && (
                          <Input
                            placeholder="值"
                            value={condition.value}
@@ -379,6 +383,7 @@ const ConditionalDialog: React.FC<Props> = ({
                            className="w-32"
                            size="small"
                          />
+                        )}
                        {rule.conditions.length > 1 && (
                          <Button
                            type="text"
--- a/frontend-v2/src/modules/dc/pages/tool-c/components/DataGrid.tsx
+++ b/frontend-v2/src/modules/dc/pages/tool-c/components/DataGrid.tsx
@@ -23,6 +23,19 @@ interface DataGridProps {
  onCellValueChanged?: (params: any) => void;
 }

+// ✨ 自定义表头组件（带tooltip）
+const CustomHeader = (props: any) => {
+  return (
+    <div 
+      className="ag-header-cell-label" 
+      title={props.displayName}
+      style={{ overflow: 'hidden', textOverflow: 'ellipsis', whiteSpace: 'nowrap' }}
+    >
+      {props.displayName}
+    </div>
+  );
+};
+
 const DataGrid: React.FC<DataGridProps> = ({ data, columns, onCellValueChanged }) => {
  // 防御性编程：确保 data 和 columns 始终是数组
  const safeData = data || [];
@@ -41,8 +54,13 @@ const DataGrid: React.FC<DataGridProps> = ({ data, columns, onCellValueChanged }
        // ✅ 修复：使用安全的field名（索引），通过valueGetter获取实际数据
        field: `col_${index}`,
        headerName: col.name,
-        // ✅ 优化：添加tooltip显示完整列名
+        // ✅ 优化：添加tooltip显示完整列名（双保险）
        headerTooltip: col.name,
+        // ✨ 使用自定义表头组件（确保tooltip一定显示）
+        headerComponent: CustomHeader,
+        headerComponentParams: {
+          displayName: col.name,
+        },
        // ✅ 关键修复：使用valueGetter直接从原始数据中获取值
        valueGetter: (params: any) => {
          return params.data?.[col.id];
@@ -93,7 +111,7 @@ const DataGrid: React.FC<DataGridProps> = ({ data, columns, onCellValueChanged }
  // 空状态
  if (safeData.length === 0) {
    return (
-      <div className="bg-white border-2 border-slate-200 shadow-lg rounded-2xl p-12 text-center h-full flex items-center justify-center">
+      <div className="bg-white border-2 border-slate-200 shadow-lg rounded-2xl p-12 text-center flex items-center justify-center" style={{ height: '100%' }}>
        <div className="text-slate-400 text-sm space-y-3">
          <p className="text-2xl">📊 暂无数据</p>
          <p className="text-base text-slate-500">请在右侧AI助手中上传CSV或Excel文件</p>
@@ -118,6 +136,8 @@ const DataGrid: React.FC<DataGridProps> = ({ data, columns, onCellValueChanged }
          domLayout="normal"
          suppressCellFocus={false}
          enableCellTextSelection={true}
+          // ✅ 启用浏览器原生tooltip（让headerTooltip生效）
+          enableBrowserTooltips={true}
          // ✅ 修复 AG Grid #239：使用 legacy 主题模式
          theme="legacy"
          // 性能优化
--- a/frontend-v2/src/modules/dc/pages/tool-c/components/PivotDialog.tsx
+++ b/frontend-v2/src/modules/dc/pages/tool-c/components/PivotDialog.tsx
@@ -23,6 +23,9 @@ const PivotDialog: React.FC<Props> = ({
  const [valueColumns, setValueColumns] = useState<string[]>([]);
  const [aggfunc, setAggfunc] = useState<'first' | 'last' | 'mean' | 'sum'>('first');
  const [loading, setLoading] = useState(false);
+  // ✨ 新增：未选择列的处理
+  const [keepUnusedColumns, setKeepUnusedColumns] = useState(false);
+  const [unusedAggMethod, setUnusedAggMethod] = useState<'first' | 'mode' | 'mean'>('first');
  
  // 重置状态
  useEffect(() => {
@@ -31,6 +34,8 @@ const PivotDialog: React.FC<Props> = ({
      setPivotColumn('');
      setValueColumns([]);
      setAggfunc('first');
+      setKeepUnusedColumns(false);  // ✨ 重置：不保留未选择的列
+      setUnusedAggMethod('first');  // ✨ 重置：默认取第一个值
    }
  }, [visible]);
  
@@ -71,6 +76,8 @@ const PivotDialog: React.FC<Props> = ({
            pivotColumn,
            valueColumns,
            aggfunc,
+            keepUnusedColumns,  // ✨ 新增：是否保留未选择的列
+            unusedAggMethod,  // ✨ 新增：未选择列的聚合方式
          },
        }),
      });
@@ -238,6 +245,53 @@ const PivotDialog: React.FC<Props> = ({
          </div>
        </div>
        
+        {/* ✨ 高级选项 */}
+        <div className="border-t pt-4">
+          <div className="flex items-center gap-2 mb-3">
+            <span className="text-sm font-medium text-slate-700">⚙️ 高级选项</span>
+          </div>
+          
+          <Checkbox
+            checked={keepUnusedColumns}
+            onChange={(e) => setKeepUnusedColumns(e.target.checked)}
+          >
+            <span className="text-sm font-medium">保留未选择的列</span>
+          </Checkbox>
+          
+          {keepUnusedColumns && (
+            <div className="ml-6 mt-3 p-3 bg-slate-50 rounded-lg border border-slate-200">
+              <label className="text-sm font-medium text-slate-700 mb-2 block">
+                聚合方式：
+              </label>
+              <Radio.Group
+                value={unusedAggMethod}
+                onChange={(e) => setUnusedAggMethod(e.target.value)}
+              >
+                <div className="space-y-2">
+                  <Radio value="first">
+                    <div className="ml-2">
+                      <span className="font-medium text-sm">取第一个值</span>
+                      <span className="text-xs text-slate-500 ml-2">（默认）</span>
+                    </div>
+                  </Radio>
+                  <Radio value="mode">
+                    <span className="ml-2 font-medium text-sm">取众数</span>
+                  </Radio>
+                  <Radio value="mean">
+                    <div className="ml-2">
+                      <span className="font-medium text-sm">取均值</span>
+                      <span className="text-xs text-slate-500 ml-2">（仅数值列）</span>
+                    </div>
+                  </Radio>
+                </div>
+              </Radio.Group>
+              <div className="text-xs text-slate-500 mt-2">
+                未选择的列将按此方式聚合，并保留在结果中
+              </div>
+            </div>
+          )}
+        </div>
+        
        {/* 警告 */}
        <Alert
          title="重要提示"
--- a/frontend-v2/src/modules/dc/pages/tool-c/components/RecodeDialog.tsx
+++ b/frontend-v2/src/modules/dc/pages/tool-c/components/RecodeDialog.tsx
@@ -40,6 +40,11 @@ const RecodeDialog: React.FC<RecodeDialogProps> = ({
  const [newColumnName, setNewColumnName] = useState('');
  const [loading, setLoading] = useState(false);
  const [extracting, setExtracting] = useState(false);
+  // ✨ NA处理
+  const [hasNA, setHasNA] = useState(false);
+  const [naCount, setNaCount] = useState(0);  // ✨ NA数量
+  const [naHandling, setNaHandling] = useState<'keep' | 'map' | 'drop'>('keep');
+  const [naValue, setNaValue] = useState<string>('');

  // 当选择列时，从后端获取唯一值
  useEffect(() => {
@@ -65,17 +70,38 @@ const RecodeDialog: React.FC<RecodeDialogProps> = ({
        }
        
        const unique = result.data.uniqueValues;
+        const naCountFromBackend = result.data.naCount || 0;  // ✨ 从后端获取NA数量
        
        setUniqueValues(unique);
        
-        // 初始化映射表
-        const initialMapping = unique.map((val: any) => ({
+        // ✨ 检测是否有NA值（后端用<空值/NA>标记）
+        const hasNAValue = unique.some((val: any) => 
+          val === null || 
+          val === undefined || 
+          val === '' || 
+          val === '<空值/NA>'  // ✨ 后端返回的特殊标记
+        );
+        setHasNA(hasNAValue);
+        setNaCount(naCountFromBackend);  // ✨ 保存NA数量
+        
+        // 初始化映射表（排除NA值，NA单独处理）
+        const nonNAValues = unique.filter((val: any) => 
+          val !== null && 
+          val !== undefined && 
+          val !== '' && 
+          val !== '<空值/NA>'  // ✨ 排除特殊标记
+        );
+        const initialMapping = nonNAValues.map((val: any) => ({
          originalValue: val,
          newValue: '',
        }));
        
        setMappingTable(initialMapping);
        
+        // 重置NA处理
+        setNaHandling('keep');
+        setNaValue('');
+        
        // 生成默认新列名
        setNewColumnName(`${selectedColumn}_编码`);
      } catch (error: any) {
@@ -172,6 +198,8 @@ const RecodeDialog: React.FC<RecodeDialogProps> = ({
            mapping,
            createNewColumn,
            newColumnName: createNewColumn ? newColumnName : undefined,
+            naHandling,  // ✨ NA处理方式
+            naValue: naHandling === 'map' ? naValue : undefined,  // ✨ NA映射值
          },
        }),
      });
@@ -239,7 +267,12 @@ const RecodeDialog: React.FC<RecodeDialogProps> = ({
                <div>
                  <div className="flex items-center justify-between mb-2">
                    <label className="text-sm font-medium text-slate-700">
-                      检测到 {uniqueValues.length} 个唯一值：
+                      检测到 {mappingTable.length} 个唯一值：
+                      {hasNA && (
+                        <span className="ml-2 text-xs text-yellow-600">
+                          （+空值/NA，见下方处理）
+                        </span>
+                      )}
                    </label>
                    <span className="text-xs text-slate-500">
                      💡 提示：可以批量设置（如：1,2,3...）
@@ -276,6 +309,46 @@ const RecodeDialog: React.FC<RecodeDialogProps> = ({
                    </div>
                  )}
                </div>
+
+                {/* ✨ NA处理区域 */}
+                {hasNA && (
+                  <div className="bg-yellow-50 p-3 rounded-lg border border-yellow-200">
+                    <div className="flex items-center gap-2 mb-2">
+                      <span className="text-sm font-medium text-slate-700">⚠️ 空值/NA 处理</span>
+                      <span className="text-xs text-slate-500">
+                        检测到 {naCount} 个空值
+                      </span>
+                    </div>
+                    <Select
+                      value={naHandling}
+                      onChange={setNaHandling}
+                      style={{ width: '100%' }}
+                      size="small"
+                    >
+                      <Select.Option value="keep">
+                        <span className="text-sm">保持为NA（默认）</span>
+                      </Select.Option>
+                      <Select.Option value="map">
+                        <span className="text-sm">映射为指定值</span>
+                      </Select.Option>
+                      <Select.Option value="drop">
+                        <span className="text-sm text-red-600">删除包含NA的行</span>
+                      </Select.Option>
+                    </Select>
+                    
+                    {naHandling === 'map' && (
+                      <div className="mt-2">
+                        <Input
+                          placeholder="输入NA映射的值（如：9, 未知）"
+                          value={naValue}
+                          onChange={(e) => setNaValue(e.target.value)}
+                          size="small"
+                          prefix={<span className="text-xs text-slate-500">NA → </span>}
+                        />
+                      </div>
+                    )}
+                  </div>
+                )}
              </>
            )}
          </>
--- a/frontend-v2/src/modules/dc/pages/tool-c/index.tsx
+++ b/frontend-v2/src/modules/dc/pages/tool-c/index.tsx
@@ -36,6 +36,7 @@ interface ToolCState {
  // UI状态
  isLoading: boolean;
  isSidebarOpen: boolean;
+  isAlertClosed: boolean; // ✨ 新增：提示条关闭状态
  
  // ✨ 功能按钮对话框状态
  filterDialogVisible: boolean;
@@ -69,6 +70,7 @@ const ToolC = () => {
    messages: [],
    isLoading: false,
    isSidebarOpen: true,
+    isAlertClosed: false, // ✨ 初始状态：未关闭
    filterDialogVisible: false,
    recodeDialogVisible: false,
    binningDialogVisible: false,
@@ -228,7 +230,7 @@ const ToolC = () => {

  // ==================== 渲染 ====================
  return (
-    <div className="h-screen w-screen flex flex-col bg-gradient-to-br from-slate-50 to-slate-100 overflow-hidden">
+    <div className="h-screen w-screen flex flex-col bg-gradient-to-br from-slate-50 to-slate-100">
      {/* 顶部栏 */}
      <Header 
        fileName={state.fileName || '未上传文件'} 
@@ -237,10 +239,10 @@ const ToolC = () => {
        onToggleSidebar={() => updateState({ isSidebarOpen: !state.isSidebarOpen })}
      />
      
-      {/* 主工作区 */}
-      <div className="flex-1 flex overflow-hidden">
-        {/* 左侧：表格区域 */}
-        <div className="flex-1 flex flex-col min-w-0 overflow-hidden">
+      {/* 主工作区 - 移除overflow-hidden，让子元素自己处理滚动 */}
+      <div className="flex-1 flex min-h-0">
+        {/* 左侧：表格区域 - 独立滚动 */}
+        <div className="flex-1 flex flex-col min-w-0">
          <Toolbar 
            sessionId={state.sessionId}
            onFilterClick={() => updateState({ filterDialogVisible: true })}
@@ -251,23 +253,34 @@ const ToolC = () => {
            onComputeClick={() => updateState({ computeDialogVisible: true })}
            onPivotClick={() => updateState({ pivotDialogVisible: true })}
          />
-          <div className="flex-1 p-4 overflow-hidden flex flex-col">
-            {/* ✨ 优化：提示只显示前50行 */}
-            {state.data.length > 0 && (
-              <div className="mb-2 px-3 py-2 bg-blue-50 border border-blue-200 rounded-lg flex items-center gap-2 text-sm">
+          <div className="flex-1 p-4 flex flex-col min-h-0">
+            {/* ✨ 优化：提示只显示前50行（可关闭） */}
+            {state.data.length > 0 && !state.isAlertClosed && (
+              <div className="mb-2 px-3 py-2 bg-blue-50 border border-blue-200 rounded-lg flex items-center justify-between gap-2 text-sm">
+                <div className="flex items-center gap-2">
                  <span className="text-blue-600">ℹ️</span>
                  <span className="text-blue-700">
                    <strong>提示：</strong>表格仅展示前 <strong>50行</strong> 数据预览，导出功能将包含 <strong>全部</strong> 处理结果
                  </span>
                </div>
+                <button
+                  onClick={() => updateState({ isAlertClosed: true })}
+                  className="text-blue-400 hover:text-blue-600 transition-colors p-1 rounded hover:bg-blue-100"
+                  title="关闭提示"
+                >
+                  <svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+                    <path d="M4.646 4.646a.5.5 0 0 1 .708 0L8 7.293l2.646-2.647a.5.5 0 0 1 .708.708L8.707 8l2.647 2.646a.5.5 0 0 1-.708.708L8 8.707l-2.646 2.647a.5.5 0 0 1-.708-.708L7.293 8 4.646 5.354a.5.5 0 0 1 0-.708z"/>
+                  </svg>
+                </button>
+              </div>
            )}
-            <div className="flex-1 overflow-hidden">
+            <div className="flex-1 min-h-0">
              <DataGrid data={state.data} columns={state.columns} />
            </div>
          </div>
        </div>

-        {/* 右侧：AI 数据清洗助手 */}
+        {/* 右侧：AI 数据清洗助手 - 独立滚动 */}
        {state.isSidebarOpen && (
          <Sidebar
            isOpen={state.isSidebarOpen}