fix(dc/tool-c): Fix special character handling and improve UX

Major fixes: - Fix pivot transformation with special characters in column names - Fix compute column validation for Chinese punctuation - Fix recode dialog to fetch unique values from full dataset via new API - Add column mapping mechanism to handle special characters Database migration: - Add column_mapping field to dc_tool_c_sessions table - Migration file: 20251208_add_column_mapping UX improvements: - Darken table grid lines for better visibility - Reduce column width by 40% with tooltip support - Insert new columns next to source columns - Preserve original row order after operations - Add notice about 50-row preview limit Modified files: - Backend: SessionService, SessionController, QuickActionService, routes - Python: pivot.py, compute.py, recode.py, binning.py, conditional.py - Frontend: DataGrid, RecodeDialog, index.tsx, ag-grid-custom.css - Database: schema.prisma, migration SQL Status: Code complete, database migrated, ready for testing
2025-12-08 23:20:55 +08:00
parent f729699510
commit 91cab452d1
90 changed files with 735 additions and 45 deletions
--- a/extraction_service/operations/pivot.py
+++ b/extraction_service/operations/pivot.py
@@ -77,17 +77,39 @@ def pivot_long_to_wide(
            aggfunc=aggfunc
        )
        
-        # 展平多级列名
+        # ✨ 增强：展平多级列名（处理特殊字符）
        # 如果只有一个值列，列名是单层的
        if len(value_columns) == 1:
-            df_pivot.columns = [f'{value_columns[0]}_{col}' for col in df_pivot.columns]
+            # 清理列名中的特殊字符，使用安全的分隔符
+            value_col_clean = str(value_columns[0]).replace('(', '').replace(')', '').replace('=', '').strip()
+            df_pivot.columns = [f'{value_col_clean}___{str(col).replace(" ", "_")}' for col in df_pivot.columns]
        else:
            # 多个值列，列名是多层的，需要展平
-            df_pivot.columns = ['_'.join(str(c) for c in col).strip() for col in df_pivot.columns.values]
+            # 使用三个下划线作为分隔符（避免与列名中的下划线冲突）
+            new_columns = []
+            for col in df_pivot.columns.values:
+                if isinstance(col, tuple):
+                    # 清理每个部分的特殊字符
+                    parts = [str(c).replace('(', '').replace(')', '').replace('=', '').strip() for c in col]
+                    new_col = '___'.join(parts)
+                else:
+                    new_col = str(col).replace('(', '').replace(')', '').replace('=', '').strip()
+                new_columns.append(new_col)
+            df_pivot.columns = new_columns
        
        # 重置索引（将index列变回普通列）
        df_pivot = df_pivot.reset_index()
        
+        # ✨ 优化：保持原始行顺序（按照index_column排序）
+        # 获取原始数据中index_column的顺序
+        original_order = result[index_column].drop_duplicates().tolist()
+        # 创建排序映射
+        order_map = {val: idx for idx, val in enumerate(original_order)}
+        # 添加临时排序列
+        df_pivot['_sort_order'] = df_pivot[index_column].map(order_map)
+        # 按原始顺序排序
+        df_pivot = df_pivot.sort_values('_sort_order').drop(columns=['_sort_order']).reset_index(drop=True)
+        
        print(f'转换成功！')
        print(f'结果: {len(df_pivot)} 行 × {len(df_pivot.columns)} 列')
        print(f'新增列: {len(df_pivot.columns) - 1} 列')
@@ -159,3 +181,4 @@ def get_pivot_preview(
        'estimated_columns': len(unique_pivot)
    }

+