def decrypt_to_jsonl(self, excel_file=None, output_file=None): import shutil from multiprocessing import Pool from tqdm import tqdm import subprocess import pandas as pd import os # 默认使用当天目录下的 chat.xlsx if excel_file is None: excel_file = os.path.join(self.today_dir, 'chat.xlsx') if not os.path.exists(excel_file): print(f"错误:找不到 {excel_file}") return # 默认输出到当天目录 if output_file is None: output_file = os.path.join(self.today_dir, 'chatdata.jsonl') # 如果已存在旧文件,先删除 if os.path.exists(output_file): os.remove(output_file) print(f"开始加载文件 {excel_file} ……💕") df = pd.read_excel(excel_file, engine='openpyxl') # --- 核心修改:提前去重加密字段,避免重复解密 --- df = df.drop_duplicates(subset=['encrypt_key', 'encrypt_chat_msg']) encrypt_key = df['encrypt_key'].tolist() encrypt_chat_msg = df['encrypt_chat_msg'].tolist() print("开始解密聊天记录……💕") def _process(i): result = subprocess.run( [self.sdktools_path, '3', encrypt_key[i], encrypt_chat_msg[i]], stdout=subprocess.PIPE ) return result.stdout.decode('utf-8').strip() # 多进程解密 with Pool() as p: results = list(tqdm( p.map(_process, range(len(encrypt_key))), total=len(encrypt_key), desc='Processing' )) # 再次去重,防止 SDK 输出微小差异造成重复 unique_results = list(dict.fromkeys(results)) # 写入 JSONL with open(output_file, 'w', encoding='utf-8') as f: for line in unique_results: f.write(line + '\n') print(f"数据处理完成 ✔ 已保存到 {output_file}")