Major features: 1. Missing value imputation (6 simple methods + MICE): - Mean/Median/Mode/Constant imputation - Forward fill (ffill) and Backward fill (bfill) for time series - MICE multivariate imputation (in progress, shape issue to fix) 2. Auto precision detection: - Automatically match decimal places of original data - Prevent false precision (e.g. 13.57 instead of 13.566716417910449) 3. Categorical variable detection: - Auto-detect and skip categorical columns in MICE - Show warnings for unsuitable columns - Suggest mode imputation for categorical data 4. UI improvements: - Rename button: "Delete Missing" to "Missing Value Handling" - Remove standalone "Dedup" and "MICE" buttons - 3-tab dialog: Delete / Fill / Advanced Fill - Display column statistics and recommended methods - Extended warning messages (8 seconds for skipped columns) 5. Bug fixes: - Fix sessionService.updateSessionData -> saveProcessedData - Fix OperationResult interface (add message and stats) - Fix Toolbar button labels and removal Modified files: Python: operations/fillna.py (new, 556 lines), main.py (3 new endpoints) Backend: QuickActionService.ts, QuickActionController.ts, routes/index.ts Frontend: MissingValueDialog.tsx (new, 437 lines), Toolbar.tsx, index.tsx Tests: test_fillna_operations.py (774 lines), test scripts and docs Docs: 5 documentation files updated Known issues: - MICE imputation has DataFrame shape mismatch issue (under debugging) - Workaround: Use 6 simple imputation methods first Status: Development complete, MICE debugging in progress Lines added: ~2000 lines across 3 tiers
191 lines
5.8 KiB
JavaScript
191 lines
5.8 KiB
JavaScript
/**
|
||
* 从Cursor的SQLite数据库中恢复代码历史
|
||
*
|
||
* 使用方法:
|
||
* 1. cd backend
|
||
* 2. npm install better-sqlite3
|
||
* 3. node recover-code-from-cursor-db.js
|
||
*/
|
||
|
||
const Database = require('better-sqlite3');
|
||
const fs = require('fs');
|
||
const path = require('path');
|
||
|
||
// Cursor SQLite数据库路径
|
||
const DB_PATH = path.join(
|
||
process.env.APPDATA || process.env.HOME,
|
||
'Cursor/User/workspaceStorage/d5e3431d02cbaa0109f69d72300733da/state.vscdb'
|
||
);
|
||
|
||
// 输出目录
|
||
const OUTPUT_DIR = path.join(__dirname, 'cursor-history-recovery');
|
||
|
||
console.log('🔍 正在读取Cursor历史数据库...');
|
||
console.log('📂 数据库路径:', DB_PATH);
|
||
|
||
if (!fs.existsSync(DB_PATH)) {
|
||
console.error('❌ 数据库文件不存在!');
|
||
process.exit(1);
|
||
}
|
||
|
||
// 创建输出目录
|
||
if (!fs.existsSync(OUTPUT_DIR)) {
|
||
fs.mkdirSync(OUTPUT_DIR, { recursive: true });
|
||
}
|
||
|
||
try {
|
||
// 打开数据库(只读模式)
|
||
const db = new Database(DB_PATH, { readonly: true, fileMustExist: true });
|
||
|
||
console.log('✅ 数据库打开成功!');
|
||
|
||
// 1. 查看表结构
|
||
console.log('\n📊 数据库表列表:');
|
||
const tables = db.prepare("SELECT name FROM sqlite_master WHERE type='table'").all();
|
||
console.log(tables.map(t => ` - ${t.name}`).join('\n'));
|
||
|
||
// 2. 查询ItemTable表结构
|
||
if (tables.some(t => t.name === 'ItemTable')) {
|
||
console.log('\n📋 ItemTable 表结构:');
|
||
const columns = db.prepare("PRAGMA table_info(ItemTable)").all();
|
||
console.log(columns.map(c => ` - ${c.name} (${c.type})`).join('\n'));
|
||
|
||
// 3. 查询所有key(了解有哪些类型的数据)
|
||
console.log('\n🔑 ItemTable 中的所有key类型:');
|
||
const keys = db.prepare("SELECT DISTINCT key FROM ItemTable").all();
|
||
console.log(keys.map(k => ` - ${k.key}`).join('\n'));
|
||
|
||
// 4. 查找聊天历史相关的key
|
||
console.log('\n💬 查找聊天/Composer历史记录...');
|
||
const chatKeys = [
|
||
'workbench.panel.chat',
|
||
'composer',
|
||
'chat',
|
||
'workbench.panel.aichat',
|
||
'aiPanel'
|
||
];
|
||
|
||
let foundCount = 0;
|
||
|
||
for (const keyPattern of chatKeys) {
|
||
const rows = db.prepare(
|
||
`SELECT key, value FROM ItemTable WHERE key LIKE ?`
|
||
).all(`%${keyPattern}%`);
|
||
|
||
if (rows.length > 0) {
|
||
console.log(`\n✅ 找到 ${rows.length} 条与 "${keyPattern}" 相关的记录`);
|
||
|
||
rows.forEach((row, index) => {
|
||
foundCount++;
|
||
const filename = `${keyPattern.replace(/[^a-z0-9]/gi, '_')}_${index + 1}.json`;
|
||
const filepath = path.join(OUTPUT_DIR, filename);
|
||
|
||
// 保存原始JSON
|
||
fs.writeFileSync(filepath, row.value);
|
||
console.log(` 📄 已保存: ${filename} (${(row.value.length / 1024).toFixed(2)} KB)`);
|
||
|
||
// 尝试解析JSON并提取代码
|
||
try {
|
||
const data = JSON.parse(row.value);
|
||
|
||
// 提取可能的代码片段
|
||
const codeBlocks = extractCodeBlocks(data);
|
||
if (codeBlocks.length > 0) {
|
||
const codeFilename = `${keyPattern.replace(/[^a-z0-9]/gi, '_')}_${index + 1}_code.txt`;
|
||
const codeFilepath = path.join(OUTPUT_DIR, codeFilename);
|
||
fs.writeFileSync(codeFilepath, codeBlocks.join('\n\n' + '='.repeat(80) + '\n\n'));
|
||
console.log(` 📝 提取了 ${codeBlocks.length} 个代码块: ${codeFilename}`);
|
||
}
|
||
} catch (err) {
|
||
console.log(` ⚠️ JSON解析失败: ${err.message}`);
|
||
}
|
||
});
|
||
}
|
||
}
|
||
|
||
if (foundCount === 0) {
|
||
console.log('\n⚠️ 未找到聊天历史记录,尝试提取所有数据...');
|
||
|
||
// 导出所有ItemTable数据
|
||
const allRows = db.prepare("SELECT key, value FROM ItemTable").all();
|
||
console.log(`\n📦 共有 ${allRows.length} 条记录,正在导出...`);
|
||
|
||
const allDataFile = path.join(OUTPUT_DIR, 'all_itemtable_data.json');
|
||
fs.writeFileSync(allDataFile, JSON.stringify(allRows, null, 2));
|
||
console.log(`✅ 已导出所有数据到: all_itemtable_data.json (${(fs.statSync(allDataFile).size / 1024 / 1024).toFixed(2)} MB)`);
|
||
}
|
||
|
||
} else {
|
||
console.log('\n❌ ItemTable 表不存在!');
|
||
}
|
||
|
||
db.close();
|
||
console.log(`\n✅ 恢复完成!所有文件保存在: ${OUTPUT_DIR}`);
|
||
console.log('\n💡 下一步:');
|
||
console.log(' 1. 检查 cursor-history-recovery 文件夹');
|
||
console.log(' 2. 打开 .json 文件查找DC模块相关的代码');
|
||
console.log(' 3. 查找关键词:DualModelExtractionService, HealthCheckService, ExtractionController');
|
||
|
||
} catch (error) {
|
||
console.error('❌ 错误:', error.message);
|
||
console.error(error.stack);
|
||
process.exit(1);
|
||
}
|
||
|
||
/**
|
||
* 从JSON数据中递归提取代码块
|
||
*/
|
||
function extractCodeBlocks(obj, blocks = []) {
|
||
if (typeof obj === 'string') {
|
||
// 查找代码块模式
|
||
const codePatterns = [
|
||
/```[\s\S]*?```/g, // Markdown代码块
|
||
/export\s+(const|function|class)\s+\w+/g, // TypeScript导出
|
||
/interface\s+\w+/g, // TypeScript接口
|
||
/async\s+function\s+\w+/g, // 异步函数
|
||
];
|
||
|
||
codePatterns.forEach(pattern => {
|
||
const matches = obj.match(pattern);
|
||
if (matches) {
|
||
blocks.push(...matches);
|
||
}
|
||
});
|
||
|
||
// 如果包含关键代码关键词,保存整段
|
||
const keywords = [
|
||
'DualModelExtractionService',
|
||
'HealthCheckService',
|
||
'TemplateService',
|
||
'ConflictDetectionService',
|
||
'ExtractionController',
|
||
'dc_extraction_tasks',
|
||
'dc_health_checks'
|
||
];
|
||
|
||
if (keywords.some(kw => obj.includes(kw))) {
|
||
blocks.push(obj);
|
||
}
|
||
} else if (Array.isArray(obj)) {
|
||
obj.forEach(item => extractCodeBlocks(item, blocks));
|
||
} else if (obj && typeof obj === 'object') {
|
||
Object.values(obj).forEach(value => extractCodeBlocks(value, blocks));
|
||
}
|
||
|
||
return blocks;
|
||
}
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|