#!/usr/bin/env python3 """ memory-md-archive 归档脚本 归档 MEMORY.md 中的旧内容,控制文件体积 """ import os import re import sys import argparse import shutil from datetime import datetime, timedelta from pathlib import Path # 配置 WORKSPACE_DIR = Path.home() / ".openclaw" / "workspace" MEMORY_FILE = WORKSPACE_DIR / "MEMORY.md" ARCHIVE_DIR = WORKSPACE_DIR / "memory" DAILY_ARCHIVE_DIR = ARCHIVE_DIR / "archive-daily" MAJOR_ARCHIVE_DIR = ARCHIVE_DIR / "archive-major" LEARNING_ARCHIVE_DIR = ARCHIVE_DIR / "archive-learning" MAX_SIZE_BYTES = 4 * 1024 # 4KB 限制 DAYS_TO_KEEP = 7 MAX_MAJOR_EVENTS = 30 MAX_LEARNING_EVENTS = 30 # 重要性关键词权重 IMPORTANCE_KEYWORDS = { 'high': ['架构', '重构', '决策', '配置变更', '重要', '关键', '核心', '基础'], 'medium': ['升级', '优化', '改进', '调整', '更新', '修复'], 'low': ['测试', '查看', '查询', '搜索', '临时'] } def ensure_dirs(): """确保归档目录存在""" DAILY_ARCHIVE_DIR.mkdir(parents=True, exist_ok=True) MAJOR_ARCHIVE_DIR.mkdir(parents=True, exist_ok=True) LEARNING_ARCHIVE_DIR.mkdir(parents=True, exist_ok=True) def read_memory_file(): """读取 MEMORY.md 文件内容""" if not MEMORY_FILE.exists(): return "" return MEMORY_FILE.read_text(encoding='utf-8') def write_memory_file(content): """写入 MEMORY.md 文件""" MEMORY_FILE.parent.mkdir(parents=True, exist_ok=True) MEMORY_FILE.write_text(content, encoding='utf-8') def get_file_size(): """获取文件大小(字节)""" if not MEMORY_FILE.exists(): return 0 return MEMORY_FILE.stat().st_size def parse_date_from_line(line): """从行中提取日期""" match = re.search(r'(\d{4}-\d{2}-\d{2})', line) if match: try: return datetime.strptime(match.group(1), '%Y-%m-%d').date() except ValueError: return None return None def extract_month_from_date(date_obj): """从日期对象提取年月字符串""" return date_obj.strftime('%Y-%m') def get_importance_score(line): """计算事件的重要性分数""" score = 50 # 基础分 for keyword in IMPORTANCE_KEYWORDS['high']: if keyword in line: score += 30 for keyword in IMPORTANCE_KEYWORDS['medium']: if keyword in line: score += 15 for keyword in IMPORTANCE_KEYWORDS['low']: if keyword in line: score -= 10 # 关键词越多可能越重要 keyword_count = len(re.findall(r'[\u4e00-\u9fff]{2,}|[a-zA-Z]{3,}', line)) if keyword_count > 5: score += 10 return max(0, min(100, score)) def extract_sections(content): """提取 MEMORY.md 的各个区块""" sections = { 'header': '', 'important': [], 'daily': {}, 'learning': [], 'footer': '' } lines = content.split('\n') current_section = 'header' current_date = None for line in lines: stripped = line.strip() # 检测区块 if '## 🔔 重要事件' in stripped: current_section = 'important' continue elif '## 📅 事件流水' in stripped or '## 📅 最近7天事件流水' in stripped: current_section = 'daily' continue elif '## 📚 学习事件' in stripped: current_section = 'learning' continue elif stripped.startswith('---') and current_section != 'header': # 可能是footer开始 if sections['footer'] == '': current_section = 'footer' # 收集内容 if current_section == 'header': sections['header'] += line + '\n' elif current_section == 'important' and stripped.startswith('-'): sections['important'].append(line) elif current_section == 'daily': # 检测日期分组 if stripped.startswith('### '): current_date = stripped.replace('### ', '').strip() if current_date not in sections['daily']: sections['daily'][current_date] = [] elif stripped.startswith('-') and current_date: sections['daily'][current_date].append(line) elif current_section == 'learning' and stripped.startswith('-'): sections['learning'].append(line) elif current_section == 'footer': sections['footer'] += line + '\n' return sections def archive_daily_events(force=False): """ 归档8天前的日常事件 Returns: {'archived': int, 'files': list} """ ensure_dirs() content = read_memory_file() if not content: return {'archived': 0, 'files': []} sections = extract_sections(content) cutoff_date = (datetime.now() - timedelta(days=DAYS_TO_KEEP)).date() archived_count = 0 archived_files = set() # 按月份组织归档内容 archive_by_month = {} remaining_daily = {} for date_str, events in sections['daily'].items(): date_obj = parse_date_from_line(f"- {date_str}") if not date_obj: remaining_daily[date_str] = events continue if date_obj < cutoff_date or force: # 需要归档 month_key = extract_month_from_date(date_obj) if month_key not in archive_by_month: archive_by_month[month_key] = {} archive_by_month[month_key][date_str] = events archived_count += len(events) archived_files.add(month_key) else: # 保留 remaining_daily[date_str] = events # 写入归档文件 for month_key, month_data in archive_by_month.items(): archive_file = DAILY_ARCHIVE_DIR / f"{month_key}.md" # 读取现有归档内容(如果存在) existing_content = "" if archive_file.exists(): existing_content = archive_file.read_text(encoding='utf-8') # 生成归档内容 new_content = generate_daily_archive_content(month_key, month_data, existing_content) archive_file.write_text(new_content, encoding='utf-8') # 更新 MEMORY.md if archived_count > 0: sections['daily'] = remaining_daily rebuild_memory_file(sections) return { 'archived': archived_count, 'files': [f"{m}.md" for m in archived_files] } def generate_daily_archive_content(month_key, month_data, existing_content=""): """生成日常事件归档文件内容""" year, month = month_key.split('-') lines = [ f"# 日常事件归档 - {year}年{int(month)}月", "", "> 自动归档的日常事件记录", f"> 归档时间: {datetime.now().strftime('%Y-%m-%d %H:%M')}", "", "---", "" ] # 添加现有内容(如果有) if existing_content: # 提取现有条目,避免重复 existing_dates = set() for date_str in month_data.keys(): existing_dates.add(date_str) # 解析现有内容,提取不在 month_data 中的条目 existing_sections = extract_daily_from_archive(existing_content) for date_str, events in existing_sections.items(): if date_str not in month_data: month_data[date_str] = events # 按日期排序 for date_str in sorted(month_data.keys(), reverse=True): lines.append(f"## {date_str}") lines.append("") for event in month_data[date_str]: lines.append(event) lines.append("") return '\n'.join(lines) def extract_daily_from_archive(content): """从归档文件中提取日常事件""" sections = {} current_date = None for line in content.split('\n'): stripped = line.strip() if stripped.startswith('## '): current_date = stripped.replace('## ', '').strip() if current_date not in sections: sections[current_date] = [] elif stripped.startswith('-') and current_date: sections[current_date].append(line) return sections def archive_major_events(): """ 归档重要事件(超过30条时) Returns: {'archived': int, 'files': list} """ ensure_dirs() content = read_memory_file() if not content: return {'archived': 0, 'files': []} sections = extract_sections(content) important_events = sections['important'] if len(important_events) <= MAX_MAJOR_EVENTS: return {'archived': 0, 'files': []} # 计算重要性分数 scored_events = [(event, get_importance_score(event)) for event in important_events] scored_events.sort(key=lambda x: x[1], reverse=True) # 保留前30条,归档其余的 keep_events = [e[0] for e in scored_events[:MAX_MAJOR_EVENTS]] archive_events = [e[0] for e in scored_events[MAX_MAJOR_EVENTS:]] # 按月份组织归档 archive_by_month = {} for event in archive_events: date_obj = parse_date_from_line(event) if date_obj: month_key = extract_month_from_date(date_obj) else: month_key = datetime.now().strftime('%Y-%m') if month_key not in archive_by_month: archive_by_month[month_key] = [] archive_by_month[month_key].append(event) # 写入归档文件 archived_files = set() for month_key, events in archive_by_month.items(): archive_file = MAJOR_ARCHIVE_DIR / f"{month_key}.md" append_to_major_archive(archive_file, month_key, events) archived_files.add(month_key) # 更新 MEMORY.md sections['important'] = keep_events rebuild_memory_file(sections) return { 'archived': len(archive_events), 'files': [f"{m}.md" for m in archived_files] } def append_to_major_archive(archive_file, month_key, events): """追加重要事件到归档文件""" year, month = month_key.split('-') existing_events = [] if archive_file.exists(): content = archive_file.read_text(encoding='utf-8') existing_events = [line for line in content.split('\n') if line.strip().startswith('-')] # 合并并去重 all_events = existing_events + events seen = set() unique_events = [] for e in all_events: key = e.strip() if key not in seen: seen.add(key) unique_events.append(e) lines = [ f"# 重要事件归档 - {year}年{int(month)}月", "", "> 自动归档的重要事件记录", f"> 归档时间: {datetime.now().strftime('%Y-%m-%d %H:%M')}", "", "---", "" ] for event in unique_events: lines.append(event) archive_file.write_text('\n'.join(lines), encoding='utf-8') def archive_learning_events(): """ 归档学习事件(超过30条时) Returns: {'archived': int, 'files': list} """ ensure_dirs() content = read_memory_file() if not content: return {'archived': 0, 'files': []} sections = extract_sections(content) learning_events = sections['learning'] if len(learning_events) <= MAX_LEARNING_EVENTS: return {'archived': 0, 'files': []} # 计算重要性分数(同重要事件逻辑) scored_events = [(event, get_importance_score(event)) for event in learning_events] scored_events.sort(key=lambda x: x[1], reverse=True) # 保留前30条 keep_events = [e[0] for e in scored_events[:MAX_LEARNING_EVENTS]] archive_events = [e[0] for e in scored_events[MAX_LEARNING_EVENTS:]] # 按月份组织归档 archive_by_month = {} for event in archive_events: date_obj = parse_date_from_line(event) if date_obj: month_key = extract_month_from_date(date_obj) else: month_key = datetime.now().strftime('%Y-%m') if month_key not in archive_by_month: archive_by_month[month_key] = [] archive_by_month[month_key].append(event) # 写入归档文件 archived_files = set() for month_key, events in archive_by_month.items(): archive_file = LEARNING_ARCHIVE_DIR / f"{month_key}.md" append_to_learning_archive(archive_file, month_key, events) archived_files.add(month_key) # 更新 MEMORY.md sections['learning'] = keep_events rebuild_memory_file(sections) return { 'archived': len(archive_events), 'files': [f"{m}.md" for m in archived_files] } def append_to_learning_archive(archive_file, month_key, events): """追加学习事件到归档文件""" year, month = month_key.split('-') existing_events = [] if archive_file.exists(): content = archive_file.read_text(encoding='utf-8') existing_events = [line for line in content.split('\n') if line.strip().startswith('-')] # 合并并去重 all_events = existing_events + events seen = set() unique_events = [] for e in all_events: key = e.strip() if key not in seen: seen.add(key) unique_events.append(e) lines = [ f"# 学习事件归档 - {year}年{int(month)}月", "", "> 自动归档的学习记录", f"> 归档时间: {datetime.now().strftime('%Y-%m-%d %H:%M')}", "", "---", "" ] for event in unique_events: lines.append(event) archive_file.write_text('\n'.join(lines), encoding='utf-8') def size_control_archive(): """ 体积控制:当文件超过4KB时,归档不重要的日常事件 Returns: {'archived': int, 'files': list} """ size = get_file_size() if size <= MAX_SIZE_BYTES: return {'archived': 0, 'files': []} ensure_dirs() content = read_memory_file() if not content: return {'archived': 0, 'files': []} sections = extract_sections(content) # 收集所有日常事件并计算重要性 all_daily = [] for date_str, events in sections['daily'].items(): for event in events: all_daily.append((date_str, event, get_importance_score(event))) # 按日期升序、重要性升序排序(优先归档旧的、不重要的) all_daily.sort(key=lambda x: (parse_date_from_line(x[1]) or datetime.now().date(), x[2])) # 需要归档的数量(直到文件大小符合要求) target_size = MAX_SIZE_BYTES * 0.8 # 留20%余量 current_size = size to_archive = [] for date_str, event, score in all_daily: if current_size <= target_size: break to_archive.append((date_str, event)) current_size -= len(event.encode('utf-8')) + 1 # 估算 if not to_archive: return {'archived': 0, 'files': []} # 按月份组织归档 archive_by_month = {} for date_str, event in to_archive: date_obj = parse_date_from_line(event) if date_obj: month_key = extract_month_from_date(date_obj) else: month_key = datetime.now().strftime('%Y-%m') if month_key not in archive_by_month: archive_by_month[month_key] = {} if date_str not in archive_by_month[month_key]: archive_by_month[month_key][date_str] = [] archive_by_month[month_key][date_str].append(event) # 写入归档文件 archived_files = set() for month_key, month_data in archive_by_month.items(): archive_file = DAILY_ARCHIVE_DIR / f"{month_key}.md" existing_content = "" if archive_file.exists(): existing_content = archive_file.read_text(encoding='utf-8') new_content = generate_daily_archive_content(month_key, month_data, existing_content) archive_file.write_text(new_content, encoding='utf-8') archived_files.add(month_key) # 从原文件中移除 archived_set = set((d, e.strip()) for d, e in to_archive) new_daily = {} for date_str, events in sections['daily'].items(): new_events = [e for e in events if (date_str, e.strip()) not in archived_set] if new_events: new_daily[date_str] = new_events sections['daily'] = new_daily rebuild_memory_file(sections) return { 'archived': len(to_archive), 'files': [f"{m}.md" for m in archived_files] } def rebuild_memory_file(sections): """重新构建 MEMORY.md 文件""" today = datetime.now().strftime('%Y-%m-%d') # 构建重要事件区块 important_content = '\n'.join(sections['important']) if sections['important'] else "" # 构建日常事件区块 daily_sections = [] for date_str in sorted(sections['daily'].keys(), reverse=True): daily_sections.append(f"\n### {date_str}\n") for event in sections['daily'][date_str]: daily_sections.append(f"{event}\n") daily_content = ''.join(daily_sections) if daily_sections else f"\n### {today}\n\n- {today} --:-- | 暂无记录 | --\n" # 构建学习事件区块 learning_content = '\n'.join(sections['learning']) if sections['learning'] else "" # 计算统计 total_events = len(sections['important']) + sum(len(e) for e in sections['daily'].values()) + len(sections['learning']) size_kb = round(get_file_size() / 1024, 2) new_content = f"""# MEMORY.md - 热记忆 / 活跃记忆 > 本文件记录近期发生的重要事情,详细信息可通过记忆检索获取。 --- ## 🔔 重要事件 > 记录具有全局长期性影响的重要决策和事件。 > 添加重要事件时会告知用户。 {important_content} --- ## 📅 事件流水 > 按天分组,每天主要事情的概要。 > 所有记录永久保留,可使用 memory-md-archive 技能归档瘦身。 {daily_content} --- ## 📚 学习事件 > 记录从陷阱和教训中学习的经验。 > 所有学习记录永久保留,可使用 memory-md-archive 技能归档瘦身。 {learning_content} --- *文件大小: ~{size_kb:.1f}KB | 事件数: {total_events}* *维护脚本: `memory-md-hot/scripts/daily_maintenance.py`* *归档提示: 文件较大时请使用 memory-md-archive 技能归档* """ write_memory_file(new_content) def archive_all(force_size_control=False): """执行所有归档操作""" results = { 'daily_archived': 0, 'daily_files': [], 'major_archived': 0, 'major_files': [], 'learning_archived': 0, 'learning_files': [], 'size_control_archived': 0, 'size_control_files': [], 'current_size_kb': 0, 'messages': [] } # 1. 归档日常事件 daily_result = archive_daily_events() results['daily_archived'] = daily_result['archived'] results['daily_files'] = daily_result['files'] if daily_result['archived'] > 0: results['messages'].append(f"日常事件归档: {daily_result['archived']} 条到 {daily_result['files']}") # 2. 归档重要事件 major_result = archive_major_events() results['major_archived'] = major_result['archived'] results['major_files'] = major_result['files'] if major_result['archived'] > 0: results['messages'].append(f"重要事件归档: {major_result['archived']} 条到 {major_result['files']}") # 3. 归档学习事件 learning_result = archive_learning_events() results['learning_archived'] = learning_result['archived'] results['learning_files'] = learning_result['files'] if learning_result['archived'] > 0: results['messages'].append(f"学习事件归档: {learning_result['archived']} 条到 {learning_result['files']}") # 4. 体积控制 if force_size_control or get_file_size() > MAX_SIZE_BYTES: size_result = size_control_archive() results['size_control_archived'] = size_result['archived'] results['size_control_files'] = size_result['files'] if size_result['archived'] > 0: results['messages'].append(f"体积控制归档: {size_result['archived']} 条到 {size_result['files']}") results['current_size_kb'] = round(get_file_size() / 1024, 2) return results def check_size(): """检查文件大小状态""" size = get_file_size() return { 'size_bytes': size, 'size_kb': round(size / 1024, 2), 'limit_bytes': MAX_SIZE_BYTES, 'limit_kb': 4, 'exceeded': size > MAX_SIZE_BYTES, 'percentage': round((size / MAX_SIZE_BYTES) * 100, 1) } def get_stats(): """获取归档统计信息""" sections = extract_sections(read_memory_file()) daily_count = sum(len(e) for e in sections['daily'].values()) important_count = len(sections['important']) learning_count = len(sections['learning']) size_status = check_size() # 统计归档文件 archive_stats = { 'daily': len(list(DAILY_ARCHIVE_DIR.glob('*.md'))) if DAILY_ARCHIVE_DIR.exists() else 0, 'major': len(list(MAJOR_ARCHIVE_DIR.glob('*.md'))) if MAJOR_ARCHIVE_DIR.exists() else 0, 'learning': len(list(LEARNING_ARCHIVE_DIR.glob('*.md'))) if LEARNING_ARCHIVE_DIR.exists() else 0 } return { 'current_events': { 'daily': daily_count, 'important': important_count, 'learning': learning_count, 'total': daily_count + important_count + learning_count }, 'size': size_status, 'archive_files': archive_stats, 'needs_archive': { 'daily': any( parse_date_from_line(f"- {d}") and parse_date_from_line(f"- {d}") < (datetime.now() - timedelta(days=DAYS_TO_KEEP)).date() for d in sections['daily'].keys() ), 'major': important_count > MAX_MAJOR_EVENTS, 'learning': learning_count > MAX_LEARNING_EVENTS, 'size': size_status['exceeded'] } } def list_archives(): """列出所有归档文件""" ensure_dirs() result = { 'daily': [], 'major': [], 'learning': [] } if DAILY_ARCHIVE_DIR.exists(): for f in sorted(DAILY_ARCHIVE_DIR.glob('*.md')): result['daily'].append({ 'file': f.name, 'size_kb': round(f.stat().st_size / 1024, 2) }) if MAJOR_ARCHIVE_DIR.exists(): for f in sorted(MAJOR_ARCHIVE_DIR.glob('*.md')): result['major'].append({ 'file': f.name, 'size_kb': round(f.stat().st_size / 1024, 2) }) if LEARNING_ARCHIVE_DIR.exists(): for f in sorted(LEARNING_ARCHIVE_DIR.glob('*.md')): result['learning'].append({ 'file': f.name, 'size_kb': round(f.stat().st_size / 1024, 2) }) return result def main(): """主函数""" parser = argparse.ArgumentParser(description='归档 MEMORY.md 内容') parser.add_argument('--daily-only', action='store_true', help='仅归档日常事件') parser.add_argument('--major-only', action='store_true', help='仅归档重要事件') parser.add_argument('--learning-only', action='store_true', help='仅归档学习事件') parser.add_argument('--force-size-control', action='store_true', help='强制执行体积控制') parser.add_argument('--stats', action='store_true', help='显示统计信息') parser.add_argument('--list-archives', action='store_true', help='列出归档文件') args = parser.parse_args() if args.stats: stats = get_stats() print("📊 MEMORY.md 统计信息") print(f"\n当前事件数量:") print(f" - 日常事件: {stats['current_events']['daily']}") print(f" - 重要事件: {stats['current_events']['important']}") print(f" - 学习事件: {stats['current_events']['learning']}") print(f" - 总计: {stats['current_events']['total']}") print(f"\n文件大小:") print(f" - 当前: {stats['size']['size_kb']}KB ({stats['size']['percentage']}%)") print(f" - 限制: {stats['size']['limit_kb']}KB") print(f" - 状态: {'⚠️ 超限' if stats['size']['exceeded'] else '✅ 正常'}") print(f"\n归档文件数量:") print(f" - 日常事件: {stats['archive_files']['daily']} 个") print(f" - 重要事件: {stats['archive_files']['major']} 个") print(f" - 学习事件: {stats['archive_files']['learning']} 个") print(f"\n归档需求:") print(f" - 日常事件: {'需要' if stats['needs_archive']['daily'] else '无需'}") print(f" - 重要事件: {'需要' if stats['needs_archive']['major'] else '无需'}") print(f" - 学习事件: {'需要' if stats['needs_archive']['learning'] else '无需'}") print(f" - 体积控制: {'需要' if stats['needs_archive']['size'] else '无需'}") return if args.list_archives: archives = list_archives() print("📁 归档文件列表") print(f"\n日常事件归档 (memory/archive-daily/):") for a in archives['daily']: print(f" - {a['file']} ({a['size_kb']}KB)") print(f"\n重要事件归档 (memory/archive-major/):") for a in archives['major']: print(f" - {a['file']} ({a['size_kb']}KB)") print(f"\n学习事件归档 (memory/archive-learning/):") for a in archives['learning']: print(f" - {a['file']} ({a['size_kb']}KB)") return # 执行归档 if args.daily_only: result = archive_daily_events() print(f"✅ 日常事件归档完成") print(f" - 归档数量: {result['archived']} 条") print(f" - 归档文件: {result['files']}") elif args.major_only: result = archive_major_events() print(f"✅ 重要事件归档完成") print(f" - 归档数量: {result['archived']} 条") print(f" - 归档文件: {result['files']}") elif args.learning_only: result = archive_learning_events() print(f"✅ 学习事件归档完成") print(f" - 归档数量: {result['archived']} 条") print(f" - 归档文件: {result['files']}") else: result = archive_all(force_size_control=args.force_size_control) print(f"✅ 归档完成") print(f"\n归档结果:") print(f" - 日常事件: {result['daily_archived']} 条") print(f" - 重要事件: {result['major_archived']} 条") print(f" - 学习事件: {result['learning_archived']} 条") print(f" - 体积控制: {result['size_control_archived']} 条") print(f"\n当前文件大小: {result['current_size_kb']}KB") if result['messages']: print(f"\n详细信息:") for msg in result['messages']: print(f" - {msg}") if __name__ == "__main__": main()