sdk/qywx-sdk/wechat_file_handler.py

485 lines
20 KiB
Python
Raw Normal View History

2025-08-18 09:05:41 +00:00
import base64
import json
import os
import time
import random
import hashlib
import ctypes
from datetime import datetime
import Crypto
from django.core.cache import cache
from url_md_handle import url_to_markdown
import ctypes
import json
import base64
import time
from ctypes import c_void_p, c_char_p, c_uint, c_ulonglong, c_int
from Crypto.Cipher import PKCS1_v1_5
from Crypto.PublicKey import RSA
from Crypto.PublicKey import RSA
import config # 导入配置
def gen_real_file_path():
"""生成本地文件路径和对应URL"""
auto_path = f"{datetime.now().year}/{datetime.now().month}/{datetime.now().day}"
real_path = os.path.join(config.BASE_UPLOADS, auto_path)
if not os.path.exists(real_path):
os.makedirs(real_path)
filename = time.strftime("%H%M%S", time.localtime(time.time())) + str(random.randint(100, 999))
filename = hashlib.md5(filename.encode("utf-8")).hexdigest()
real_filepath = os.path.join(real_path, filename)
url_filepath = f"{config.UPLOADS_URL}/{auto_path}/{filename}"
return real_filepath, url_filepath
def get_document_file_type(file_name):
"""根据文件扩展名判断类型"""
file_type = "other"
file_split = os.path.splitext(file_name)
if len(file_split) == 2:
ext = file_split[1].lower()
if ext in [".txt", ".md", ".markdown"]:
file_type = "txt" if ext == ".txt" else "md"
elif ext in [".java", ".py", ".c", ".cpp", ".sql", ".sh", ".bat", ".js", ".css", ".ts", ".tsx", ".jsx", ".vue"]:
file_type = "code"
elif ext in [".jpg", ".jpeg", ".png", ".bmp", ".gif"]:
file_type = "image"
elif ext in [".mp4", ".mov", ".m3u8", ".webm", ".ogv"]:
file_type = "video"
elif ext in [".mp3", ".wav", ".ogg", ".amr"]:
file_type = "sound"
return file_type
def get_text_content(file_path):
"""读取文本内容"""
try:
with open(file_path, "r", encoding="utf-8") as f:
return f.read()
except Exception as e:
print(f"--> get_text_content 异常: {e}")
return None
class WeWorkFinanceSDK:
def __init__(self, dll_path: str):
self._dll = ctypes.cdll.LoadLibrary(dll_path)
# --- 返回类型 ---
self._dll.NewSdk.restype = c_void_p
self._dll.NewSlice.restype = c_void_p
# --- 函数签名(关键:避免 OverflowError ---
self._dll.Init.argtypes = [c_void_p, c_char_p, c_char_p]
self._dll.Init.restype = c_int
self._dll.DestroySdk.argtypes = [c_void_p]
self._dll.GetChatData.argtypes = [
c_void_p, # sdk
c_ulonglong, # seq
c_uint, # limit
c_char_p, # proxy
c_char_p, # passwd
c_uint, # timeout
c_void_p # slice
]
self._dll.GetChatData.restype = c_int
self._dll.GetContentFromSlice.argtypes = [c_void_p]
self._dll.GetContentFromSlice.restype = c_char_p
self._dll.FreeSlice.argtypes = [c_void_p]
self._dll.DecryptData.argtypes = [
c_char_p, # decrypt_key用RSA解出来的随机密钥字符串
c_char_p, # encrypt_chat_msgBase64字符串
c_void_p # out slice
]
self._dll.DecryptData.restype = c_int
# 可选:媒体相关(这里不演示下载)
# self._dll.NewMediaData.restype = c_void_p
# ...
# 创建 SDK 实例
self.sdk = self._dll.NewSdk()
if not self.sdk:
raise RuntimeError("NewSdk 返回空指针")
def init(self, corpid: bytes, secret: bytes):
ret = self._dll.Init(self.sdk, corpid, secret)
if ret != 0:
raise RuntimeError(f"SDK 初始化失败,错误码: {ret}")
print("SDK 初始化成功")
def destroy(self):
if self.sdk:
self._dll.DestroySdk(self.sdk)
self.sdk = None
print("SDK 已销毁")
def new_slice(self):
return self._dll.NewSlice()
def free_slice(self, slice_ptr):
self._dll.FreeSlice(slice_ptr)
def slice_to_str(self, slice_ptr) -> str:
data = self._dll.GetContentFromSlice(slice_ptr)
return data.decode("utf-8") if data else ""
def get_chat_data(self, seq: int, limit: int = 1000, timeout_sec: int = 5,
proxy: bytes | None = None, passwd: bytes | None = None) -> dict:
sp = self.new_slice()
try:
ret = self._dll.GetChatData(
self.sdk,
c_ulonglong(seq),
c_uint(limit),
proxy, # None 会变成 NULL
passwd, # None 会变成 NULL
c_uint(timeout_sec),
sp
)
if ret != 0:
raise RuntimeError(f"GetChatData 失败,错误码: {ret}")
raw = self.slice_to_str(sp)
return json.loads(raw) if raw else {}
finally:
self.free_slice(sp)
def decrypt_data(self, decrypt_key_utf8: bytes, encrypt_chat_msg_b64: bytes) -> dict:
"""
SDK 解密 encrypt_chat_msg必须传 UTF-8 decrypt_key 文本 & Base64 的密文
"""
sp = self.new_slice()
try:
ret = self._dll.DecryptData(decrypt_key_utf8, encrypt_chat_msg_b64, sp)
if ret != 0:
raise RuntimeError(f"DecryptData 失败,错误码: {ret}")
raw = self.slice_to_str(sp)
return json.loads(raw) if raw else {}
finally:
self.free_slice(sp)
class WeChatWorkHandler:
def __init__(self, sdk, wx_work):
self.sdk = sdk
self.wx_work = wx_work
from datetime import datetime
def _get_datetime_str(self, timestamp):
# timestamp 是13位整数, 由于精确到秒, 所以只需要10位除以1000
new_timestamp = timestamp
if (9999999999 < new_timestamp):
new_timestamp = timestamp / 1000
# 计算
localtime = time.localtime(new_timestamp)
datetime_str = time.strftime("%Y-%m-%d %H:%M:%S", localtime)
return datetime_str
def _get_media_file(self, sdkfileid, save_filename):
# 初始化媒体文件拉取的buf
# 媒体文件每次拉取的最大size为512k因此超过512k的文件需要分片拉取。若该文件未拉取完整sdk的IsMediaDataFinish接口会返回0同时通过GetOutIndexBuf接口返回下次拉取需要传入GetMediaData的indexbuf。
# indexbuf一般格式如右侧所示”Range: bytes = 524288 - 1048575“表示这次拉取的是从524288到1048575的分片。单个文件首次拉取填写的indexbuf为空字符串拉取后续分片时直接填入上次返回的indexbuf即可。
indexbuf = ""
while True:
media_data = self.wx_work.NewMediaData()
ret = self.wx_work.GetMediaData(self.sdk, indexbuf, sdkfileid, "", "", 5, media_data)
if (0 != ret):
print("-- GetMediaData --> 获取失败:", ret)
self.wx_work.FreeMediaData(media_data)
break
# 用msgid作为文件名这里根据需求制定文件名
# save_filename = os.getcwd() + "/images/" + result["msgid"] + file_ext
# ab为二进制文件追加读写用于分片写入不会覆盖前面已经写入的
with open(save_filename, 'ab') as f:
# 这里注意ctypes.string_at的参数addresssize,size就是分片长度得到 bytes
res = ctypes.string_at(self.wx_work.GetData(media_data), self.wx_work.GetDataLen(media_data))
f.write(res)
# 拉取完最后一个分片
if (1 == self.wx_work.IsMediaDataFinish(media_data)):
self.wx_work.FreeMediaData(media_data)
break
else:
# 获取下一个分片的indexbuf
indexbuf = self.wx_work.GetOutIndexBuf(media_data)
# 返回
return True
def _handle_sync_msg_file(self, seqNo, msgId, msgTime, msgFrom, msgType, msgObj, isSaveDatabase=False):
title = ""
file_ext = ""
file_type = ""
file_size = 0
file_content = None
sdkfileid = msgObj.get("sdkfileid")
datetime_str = self._get_datetime_str(msgTime)
if ("video" == msgType):
file_ext = ".mp4"
title = "视频_" + datetime_str.replace(" ", "").replace(":", "") + file_ext
elif ("voice" == msgType):
file_ext = ".amr"
title = "语音_" + datetime_str.replace(" ", "").replace(":", "") + file_ext
elif ("image" == msgType):
file_ext = ".png"
title = "照片_" + datetime_str.replace(" ", "").replace(":", "") + file_ext
elif ("file" == msgType):
file_ext = "." + msgObj.get("fileext")
title = msgObj.get("filename")
# 拉取媒体文件
(real_save_path, url_save_path) = gen_real_file_path()
real_save_path = real_save_path + file_ext
url_save_path = url_save_path + file_ext
self._get_media_file(sdkfileid, real_save_path)
file_size = os.path.getsize(real_save_path)
# 获取类型
file_type = get_document_file_type(url_save_path)
if ("md" == file_type):
file_content = get_text_content(real_save_path)
elif ("txt" == file_type):
file_content = get_text_content(real_save_path)
elif ("code" == file_type):
file_content = get_text_content(real_save_path)
# # 保存到数据库
# if isSaveDatabase:
# self._save_database(title, file_type, url_save_path, None, file_content, file_size, file_ext, msgFrom, datetime_str)
# 返回
resData = {
"title": title,
"file_type": file_type,
"url_save_path": url_save_path,
"real_save_path": real_save_path,
"file_content": file_content,
"file_size": file_size,
"file_ext": file_ext,
"datetime_str": datetime_str,
}
return resData
# 处理文本类型
def _handle_sync_msg_text(self, seqNo, msgId, msgTime, msgFrom, msgType, msgObj, isSaveDatabase=False):
content = msgObj.get("content")
datetime_str = self._get_datetime_str(msgTime)
print(f"-- {msgType} --> msgFrom={msgFrom}, content={content}")
# 调用 云文档 --> 创建 markdown 文件
# if isSaveDatabase:
# self._save_database(content[:50], "md", None, None, content, len(content), ".md", msgFrom, datetime_str)
# 返回
resData = {
"content": content,
"datetime_str": datetime_str,
}
return resData
# 处理链接
def _handle_sync_msg_link(self, seqNo, msgId, msgTime, msgFrom, msgType, msgObj, isSaveDatabase=False):
file_content = ""
title = msgObj.get("title")
description = msgObj.get("description")
link_url = msgObj.get("link_url")
image_url = msgObj.get("image_url")
datetime_str = self._get_datetime_str(msgTime)
print(f"-- {msgType} --> title={title}, description={description}, link_url={link_url}, image_url={image_url}")
if (description and (0<len(description))):
file_content = "> 摘要描述\n> " + description + "\n\n---\n\n"
# 根据 link_url 爬取文章内容
(md_title, md_content) = url_to_markdown(link_url)
file_content = file_content + md_content
# # 保存到数据库
# if isSaveDatabase:
# self._save_database(title, "url", None, link_url, file_content, len(file_content), ".url", msgFrom, datetime_str)
# 返回
resData = {
"title": title,
"description": description,
"link_url": link_url,
"image_url": image_url,
"file_content": file_content,
"datetime_str": datetime_str,
}
return resData
# 处理位置
def _handle_sync_msg_location(self, seqNo, msgId, msgTime, msgFrom, msgType, msgObj, isSaveDatabase=False):
title = msgObj.get("title")
address = msgObj.get("address")
longitude = msgObj.get("longitude")
latitude = msgObj.get("latitude")
zoom = msgObj.get("zoom")
datetime_str = self._get_datetime_str(msgTime)
print(f"-- {msgType} --> title={title}, address={address}, longitude={longitude}, latitude={latitude}, zoom={zoom}")
file_content = f"{title}\n地址:{address}\n精度:{longitude}\n维度:{latitude}"
# # 保存到数据库
# if isSaveDatabase:
# self._save_database(title, "md", None, None, file_content, len(file_content), ".md", msgFrom, datetime_str)
# 返回
resData = {
"title": title,
"address": address,
"longitude": longitude,
"latitude": latitude,
"file_content": file_content,
"datetime_str": datetime_str,
}
return resData
# 处理消息记录
def _handle_sync_msg_mix(self, seqNo, msgId, msgTime, msgFrom, msgType, msgObj, isSaveDatabase=False):
datetime_str = self._get_datetime_str(msgTime)
title = msgObj.get("title")
items = msgObj.get("item")
file_content = "#### " + title
for item in items:
item_type = item.get("type")
item_msgtime = item.get("msgtime")
item_content = item.get("content")
item_datetime_str = self._get_datetime_str(item_msgtime)
if (item_content and (0 < len(item_content))):
item_content_obj = json.loads(item_content)
if ("ChatRecordText" == item_type):
# 处理文本类型
resData = self._handle_sync_msg_text(None, None, item_msgtime, None, "text", item_content_obj, False)
tmp_content = resData.get("content")
elif ("ChatRecordImage" == item_type):
# 处理文件
resData = self._handle_sync_msg_file(None, None, item_msgtime, None, "image", item_content_obj, False)
tmp_content = f'![{resData.get("title")}]({resData.get("url_save_path")})'
elif ("ChatRecordVideo" == item_type):
# 处理文件
resData = self._handle_sync_msg_file(None, None, item_msgtime, None, "video", item_content_obj, False)
# tmp_content = resData.get("title") + "\n视频链接" + resData.get("url_save_path")
tmp_content = f'!video[{resData.get("title")}]({resData.get("url_save_path")})'
elif ("ChatRecordFile" == item_type):
# 处理文件
resData = self._handle_sync_msg_file(None, None, item_msgtime, None, "file", item_content_obj, False)
tmp_content = resData.get("title") + "\n文件链接:" + resData.get("url_save_path")
elif ("ChatRecordLink" == item_type):
# 处理链接
resData = self._handle_sync_msg_link(None, None, item_msgtime, None, "link", item_content_obj, False)
tmp_content = resData.get("title") + "\n原文链接:" + resData.get("link_url")
elif ("ChatRecordLocation" == item_type):
# 处理位置
resData = self._handle_sync_msg_location(None, None, item_msgtime, None, "location", item_content_obj, False)
tmp_content = resData.get("file_content")
elif ("ChatRecordMixed" == item_type):
# 处理消息记录
resData = self._handle_sync_msg_mix(None, None, item_msgtime, None, None, item_content_obj, False)
tmp_content = "---\n\n" + resData.get("file_content") + "\n\n---"
else:
tmp_content = "[该消息类型暂不能展示]"
# 构造内容
file_content = file_content + "\n\n" + "> " + item_datetime_str + "\n" + tmp_content
# 显示
print(f"----> item_type={item_type}, item_msgtime={item_msgtime}, item_content={item_content}")
print(f"----> file_content={file_content}")
# 保存到数据库
# if isSaveDatabase:
# self._save_database(title, "md", None, None, file_content, len(file_content), ".md", msgFrom, datetime_str)
# 返回
resData = {
"title": title,
"file_content": file_content,
"datetime_str": datetime_str,
}
return resData
def sync_msg(self):
private_key = RSA.importKey(self.PRI_KEY)
cipher = PKCS1_v1_5.new(private_key)
slice_ptr = self.wx_work.NewSlice()
print("-------------------------------",self.seq)
ret = self.wx_work.GetChatData(self.sdk, self.seq, 1000, None, None, 5, slice_ptr)
if ret != 0:
print("GetChatData 错误:", ret)
return
origin_data = self.wx_work.GetContentFromSlice(slice_ptr)
self.wx_work.FreeSlice(slice_ptr)
chat_data = json.loads(origin_data).get("chatdata", [])
if not chat_data:
print("没有新消息")
return
for msg in chat_data:
try:
# RSA 解密随机密钥
encrypt_key_bytes = cipher.decrypt(
base64.b64decode(msg.get("encrypt_random_key")), None
)
encrypt_key_str = encrypt_key_bytes.decode("utf-8")
# 解密消息
slice_ptr = self.wx_work.NewSlice()
decrypt_ret = self.wx_work.DecryptData(
encrypt_key_str.encode("utf-8"),
msg.get("encrypt_chat_msg").encode("utf-8"),
slice_ptr
)
if decrypt_ret != 0:
print(f"DecryptData 错误: {decrypt_ret}, msgid={msg.get('msgid')}")
continue
result_json = self.wx_work.GetContentFromSlice(slice_ptr)
self.wx_work.FreeSlice(slice_ptr)
result = json.loads(result_json)
# 更新 seq
self.seq = msg.get("seq", self.seq)
seqNo = msg.get("seq")
msgId = result.get("msgid")
msgTime = result.get("msgtime")
msgFrom = result.get("from")
msgType = result.get("msgtype")
msgObj = result.get(msgType)
# if (("video" == msgType) or ("voice" == msgType) or ("image" == msgType) or ("file" == msgType)):
# # 处理图片、音频、视频、文件
# self._handle_sync_msg_file(seqNo, msgId, msgTime, msgFrom, msgType, msgObj, True)
print(seqNo)
if ("text" == msgType):
self._handle_sync_msg_text(seqNo, msgId, msgTime, msgFrom, msgType, msgObj, True)
elif ("link" == msgType):
self._handle_sync_msg_link(seqNo, msgId, msgTime, msgFrom, msgType, msgObj, True)
elif ("location" == msgType):
self._handle_sync_msg_location(seqNo, msgId, msgTime, msgFrom, msgType, msgObj, True)
elif ("chatrecord" == msgType):
self._handle_sync_msg_mix(seqNo, msgId, msgTime, msgFrom, msgType, msgObj, True)
except Exception as e:
print("处理消息异常:", e, "msg=", msg)
# 释放 SDK
self.wx_work.DestroySdk(self.sdk)
# ====== 使用示例 ======
if __name__ == "__main__":
DLL_PATH = r"D:\pyproject\box\qywx-sdk\windows\C_sdk\FinanceSdkDemo\WeWorkFinanceSdk.dll"
CORP_ID = b"wwed4ac0dd4e1d398d"
SECRET = b"ZPZPpW0tMaIRxhAcywO_lW21ACDUiEBkxJwgrRFRdfg"
# 1⃣ 初始化 SDK
sdk_obj = WeWorkFinanceSDK(DLL_PATH)
sdk_obj.init(CORP_ID, SECRET)
# 2⃣ 加载 RSA 私钥
with open("private.pem", "rb") as f:
PRI_KEY = f.read()
# 3⃣ 初始化 Handler
wx_handler = WeChatWorkHandler(sdk_obj.sdk, sdk_obj._dll)
wx_handler.PRI_KEY = PRI_KEY
# 4⃣ 设置起始 seq
wx_handler.seq = 0
# 5⃣ 同步消息
wx_handler.sync_msg()