fix(dc/tool-c): Fix special character handling and improve UX
Major fixes: - Fix pivot transformation with special characters in column names - Fix compute column validation for Chinese punctuation - Fix recode dialog to fetch unique values from full dataset via new API - Add column mapping mechanism to handle special characters Database migration: - Add column_mapping field to dc_tool_c_sessions table - Migration file: 20251208_add_column_mapping UX improvements: - Darken table grid lines for better visibility - Reduce column width by 40% with tooltip support - Insert new columns next to source columns - Preserve original row order after operations - Add notice about 50-row preview limit Modified files: - Backend: SessionService, SessionController, QuickActionService, routes - Python: pivot.py, compute.py, recode.py, binning.py, conditional.py - Frontend: DataGrid, RecodeDialog, index.tsx, ag-grid-custom.css - Database: schema.prisma, migration SQL Status: Code complete, database migrated, ready for testing
This commit is contained in:
@@ -59,10 +59,13 @@ def validate_formula(formula: str, available_columns: list) -> tuple[bool, str]:
|
||||
if re.search(pattern, formula, re.IGNORECASE):
|
||||
return False, f'公式包含不允许的操作: {pattern}'
|
||||
|
||||
# 检查是否只包含允许的字符
|
||||
allowed_chars = r'[a-zA-Z0-9_\u4e00-\u9fa5\s\+\-\*/\(\)\.,\*\*]'
|
||||
# ✨ 增强:检查是否只包含允许的字符(放宽限制,支持更多特殊字符)
|
||||
# 允许:英文字母、数字、下划线、中文、空格、运算符、括号(中英文)、逗号、点、冒号、等号
|
||||
allowed_chars = r'[a-zA-Z0-9_\u4e00-\u9fa5\s\+\-\*/\(\)\[\]\{\}\.,:\*\*=()【】、。:;!?]'
|
||||
if not re.match(f'^{allowed_chars}+$', formula):
|
||||
return False, '公式包含不允许的字符'
|
||||
# 找出不允许的字符
|
||||
invalid_chars = set(re.findall(f'[^{allowed_chars}]', formula))
|
||||
return False, f'公式包含不允许的字符: {", ".join(invalid_chars)}'
|
||||
|
||||
return True, ''
|
||||
|
||||
@@ -110,21 +113,41 @@ def compute_column(
|
||||
# 准备执行环境
|
||||
# 1. 添加数据框的列作为变量(自动转换数值类型)
|
||||
env = {}
|
||||
for col in result.columns:
|
||||
|
||||
# ✨ 增强:处理列名中的特殊字符
|
||||
# 创建列名映射:将公式中的列名替换为安全的变量名
|
||||
col_mapping = {}
|
||||
formula_safe = formula
|
||||
|
||||
for i, col in enumerate(result.columns):
|
||||
# 为每个列创建一个安全的变量名
|
||||
safe_var = f'col_{i}'
|
||||
col_mapping[col] = safe_var
|
||||
|
||||
# 在公式中替换列名(完整匹配,避免部分替换)
|
||||
# 使用正则表达式确保只替换完整的列名
|
||||
import re
|
||||
# 转义列名中的特殊字符
|
||||
col_escaped = re.escape(col)
|
||||
# 替换公式中的列名(前后必须是边界)
|
||||
formula_safe = re.sub(rf'\b{col_escaped}\b', safe_var, formula_safe)
|
||||
|
||||
# 尝试将列转换为数值类型
|
||||
try:
|
||||
# 如果列可以转换为数值,就转换
|
||||
numeric_col = pd.to_numeric(result[col], errors='coerce')
|
||||
# 如果转换后不全是NaN,说明是数值列
|
||||
if not numeric_col.isna().all():
|
||||
env[col] = numeric_col
|
||||
print(f' 列 "{col}" 自动转换为数值类型')
|
||||
env[safe_var] = numeric_col
|
||||
print(f' 列 "{col}" -> {safe_var} (数值类型)')
|
||||
else:
|
||||
# 否则保持原样
|
||||
env[col] = result[col]
|
||||
env[safe_var] = result[col]
|
||||
print(f' 列 "{col}" -> {safe_var}')
|
||||
except Exception:
|
||||
# 转换失败,保持原样
|
||||
env[col] = result[col]
|
||||
env[safe_var] = result[col]
|
||||
print(f' 列 "{col}" -> {safe_var}')
|
||||
|
||||
# 2. 添加允许的函数
|
||||
env.update(ALLOWED_FUNCTIONS)
|
||||
@@ -132,11 +155,30 @@ def compute_column(
|
||||
# 3. 添加numpy(用于数学运算)
|
||||
env['np'] = np
|
||||
|
||||
print(f' 使用安全公式: {formula_safe}')
|
||||
print('')
|
||||
|
||||
try:
|
||||
# 执行公式计算
|
||||
result[new_column_name] = eval(formula, {"__builtins__": {}}, env)
|
||||
# ✨ 使用转换后的安全公式执行计算
|
||||
computed_values = eval(formula_safe, {"__builtins__": {}}, env)
|
||||
|
||||
print(f'计算成功!')
|
||||
# ✨ 优化:将新列插入到第一个引用列的旁边
|
||||
# 找到公式中引用的第一个列
|
||||
first_ref_col = None
|
||||
for col in result.columns:
|
||||
safe_var = col_mapping.get(col)
|
||||
if safe_var and safe_var in formula_safe:
|
||||
first_ref_col = col
|
||||
break
|
||||
|
||||
if first_ref_col:
|
||||
ref_col_index = result.columns.get_loc(first_ref_col)
|
||||
result.insert(ref_col_index + 1, new_column_name, computed_values)
|
||||
print(f'计算成功!新列插入在 {first_ref_col} 旁边')
|
||||
else:
|
||||
# 如果找不到引用列,添加到最后
|
||||
result[new_column_name] = computed_values
|
||||
print(f'计算成功!')
|
||||
print(f'新列类型: {result[new_column_name].dtype}')
|
||||
print(f'新列前5个值:')
|
||||
# 安全打印(避免NaN/inf导致序列化错误)
|
||||
|
||||
Reference in New Issue
Block a user