#!/usr/bin/env python3 import zipfile import xml.etree.ElementTree as ET import json import re def extract_excel_content(file_path): """提取Excel文件内容""" print("🔍 开始分析Excel文件...") print(f"文件: {file_path}") try: # 解压Excel文件（xlsx实际上是zip压缩包） with zipfile.ZipFile(file_path, 'r') as z: # 获取文件列表 file_list = z.namelist() print(f"\n📁 文件包含 {len(file_list)} 个内部文件") # 查找关键文件 shared_strings = None sheet_files = [] for f in file_list: if 'sharedStrings.xml' in f: shared_strings = f elif 'sheet' in f and f.endswith('.xml'): sheet_files.append(f) elif 'workbook.xml' in f: workbook_file = f print(f"共享字符串文件: {shared_strings}") print(f"工作表文件: {sheet_files}") # 读取共享字符串（如果有） strings = [] if shared_strings: with z.open(shared_strings) as f: content = f.read().decode('utf-8') # 简单提取文本 text_matches = re.findall(r']*>([^<]+)', content) strings = text_matches print(f"\n📝 找到 {len(strings)} 个文本字符串") if strings: print("示例字符串:") for i, s in enumerate(strings[:10]): print(f" {i+1}. {s}") # 读取第一个工作表 if sheet_files: sheet_file = sheet_files[0] with z.open(sheet_file) as f: content = f.read().decode('utf-8') # 提取单元格数据 cells = re.findall(r']*>.*?([^<]+)', content, re.DOTALL) print(f"\n📊 工作表 '{sheet_file}' 分析:") print(f"单元格数量: {len(cells)}") if cells: print("单元格值示例:") for i, cell in enumerate(cells[:20]): # 如果是数字引用，尝试从共享字符串获取 if cell.isdigit() and strings: idx = int(cell) if idx < len(strings): print(f" {i+1}. {strings[idx]}") else: print(f" {i+1}. [索引 {cell}]") else: print(f" {i+1}. {cell}") # 保存提取的内容 extracted_data = { 'file_name': file_path.split('/')[-1], 'shared_strings': strings, 'sheet_count': len(sheet_files), 'sheet_files': sheet_files } with open('/root/.openclaw/agents/101/workspace/excel_extracted.json', 'w', encoding='utf-8') as f: json.dump(extracted_data, f, ensure_ascii=False, indent=2) print(f"\n✅ 内容提取完成！") print(f"数据已保存到: /root/.openclaw/agents/101/workspace/excel_extracted.json") return extracted_data except Exception as e: print(f"❌ 处理文件时出错: {e}") return None if __name__ == "__main__": file_path = "/root/.openclaw/media/inbound/ä¼_ä_å¾_ä_è_½å_é_ªè_å_æ_å_é---86c8a8c3-f1bc-47fc-88e5-fb1bd39dc7fc.xlsx" extract_excel_content(file_path)