sdk/qywx-sdk/wechat_file_handler.py

485 lines
20 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import base64
import json
import os
import time
import random
import hashlib
import ctypes
from datetime import datetime
import Crypto
from django.core.cache import cache
from url_md_handle import url_to_markdown
import ctypes
import json
import base64
import time
from ctypes import c_void_p, c_char_p, c_uint, c_ulonglong, c_int
from Crypto.Cipher import PKCS1_v1_5
from Crypto.PublicKey import RSA
from Crypto.PublicKey import RSA
import config # 导入配置
def gen_real_file_path():
"""生成本地文件路径和对应URL"""
auto_path = f"{datetime.now().year}/{datetime.now().month}/{datetime.now().day}"
real_path = os.path.join(config.BASE_UPLOADS, auto_path)
if not os.path.exists(real_path):
os.makedirs(real_path)
filename = time.strftime("%H%M%S", time.localtime(time.time())) + str(random.randint(100, 999))
filename = hashlib.md5(filename.encode("utf-8")).hexdigest()
real_filepath = os.path.join(real_path, filename)
url_filepath = f"{config.UPLOADS_URL}/{auto_path}/{filename}"
return real_filepath, url_filepath
def get_document_file_type(file_name):
"""根据文件扩展名判断类型"""
file_type = "other"
file_split = os.path.splitext(file_name)
if len(file_split) == 2:
ext = file_split[1].lower()
if ext in [".txt", ".md", ".markdown"]:
file_type = "txt" if ext == ".txt" else "md"
elif ext in [".java", ".py", ".c", ".cpp", ".sql", ".sh", ".bat", ".js", ".css", ".ts", ".tsx", ".jsx", ".vue"]:
file_type = "code"
elif ext in [".jpg", ".jpeg", ".png", ".bmp", ".gif"]:
file_type = "image"
elif ext in [".mp4", ".mov", ".m3u8", ".webm", ".ogv"]:
file_type = "video"
elif ext in [".mp3", ".wav", ".ogg", ".amr"]:
file_type = "sound"
return file_type
def get_text_content(file_path):
"""读取文本内容"""
try:
with open(file_path, "r", encoding="utf-8") as f:
return f.read()
except Exception as e:
print(f"--> get_text_content 异常: {e}")
return None
class WeWorkFinanceSDK:
def __init__(self, dll_path: str):
self._dll = ctypes.cdll.LoadLibrary(dll_path)
# --- 返回类型 ---
self._dll.NewSdk.restype = c_void_p
self._dll.NewSlice.restype = c_void_p
# --- 函数签名(关键:避免 OverflowError ---
self._dll.Init.argtypes = [c_void_p, c_char_p, c_char_p]
self._dll.Init.restype = c_int
self._dll.DestroySdk.argtypes = [c_void_p]
self._dll.GetChatData.argtypes = [
c_void_p, # sdk
c_ulonglong, # seq
c_uint, # limit
c_char_p, # proxy
c_char_p, # passwd
c_uint, # timeout
c_void_p # slice
]
self._dll.GetChatData.restype = c_int
self._dll.GetContentFromSlice.argtypes = [c_void_p]
self._dll.GetContentFromSlice.restype = c_char_p
self._dll.FreeSlice.argtypes = [c_void_p]
self._dll.DecryptData.argtypes = [
c_char_p, # decrypt_key用RSA解出来的随机密钥字符串
c_char_p, # encrypt_chat_msgBase64字符串
c_void_p # out slice
]
self._dll.DecryptData.restype = c_int
# 可选:媒体相关(这里不演示下载)
# self._dll.NewMediaData.restype = c_void_p
# ...
# 创建 SDK 实例
self.sdk = self._dll.NewSdk()
if not self.sdk:
raise RuntimeError("NewSdk 返回空指针")
def init(self, corpid: bytes, secret: bytes):
ret = self._dll.Init(self.sdk, corpid, secret)
if ret != 0:
raise RuntimeError(f"SDK 初始化失败,错误码: {ret}")
print("SDK 初始化成功")
def destroy(self):
if self.sdk:
self._dll.DestroySdk(self.sdk)
self.sdk = None
print("SDK 已销毁")
def new_slice(self):
return self._dll.NewSlice()
def free_slice(self, slice_ptr):
self._dll.FreeSlice(slice_ptr)
def slice_to_str(self, slice_ptr) -> str:
data = self._dll.GetContentFromSlice(slice_ptr)
return data.decode("utf-8") if data else ""
def get_chat_data(self, seq: int, limit: int = 1000, timeout_sec: int = 5,
proxy: bytes | None = None, passwd: bytes | None = None) -> dict:
sp = self.new_slice()
try:
ret = self._dll.GetChatData(
self.sdk,
c_ulonglong(seq),
c_uint(limit),
proxy, # None 会变成 NULL
passwd, # None 会变成 NULL
c_uint(timeout_sec),
sp
)
if ret != 0:
raise RuntimeError(f"GetChatData 失败,错误码: {ret}")
raw = self.slice_to_str(sp)
return json.loads(raw) if raw else {}
finally:
self.free_slice(sp)
def decrypt_data(self, decrypt_key_utf8: bytes, encrypt_chat_msg_b64: bytes) -> dict:
"""
用 SDK 解密 encrypt_chat_msg必须传 UTF-8 的 decrypt_key 文本 & Base64 的密文)
"""
sp = self.new_slice()
try:
ret = self._dll.DecryptData(decrypt_key_utf8, encrypt_chat_msg_b64, sp)
if ret != 0:
raise RuntimeError(f"DecryptData 失败,错误码: {ret}")
raw = self.slice_to_str(sp)
return json.loads(raw) if raw else {}
finally:
self.free_slice(sp)
class WeChatWorkHandler:
def __init__(self, sdk, wx_work):
self.sdk = sdk
self.wx_work = wx_work
from datetime import datetime
def _get_datetime_str(self, timestamp):
# timestamp 是13位整数, 由于精确到秒, 所以只需要10位除以1000
new_timestamp = timestamp
if (9999999999 < new_timestamp):
new_timestamp = timestamp / 1000
# 计算
localtime = time.localtime(new_timestamp)
datetime_str = time.strftime("%Y-%m-%d %H:%M:%S", localtime)
return datetime_str
def _get_media_file(self, sdkfileid, save_filename):
# 初始化媒体文件拉取的buf
# 媒体文件每次拉取的最大size为512k因此超过512k的文件需要分片拉取。若该文件未拉取完整sdk的IsMediaDataFinish接口会返回0同时通过GetOutIndexBuf接口返回下次拉取需要传入GetMediaData的indexbuf。
# indexbuf一般格式如右侧所示”Range: bytes = 524288 - 1048575“表示这次拉取的是从524288到1048575的分片。单个文件首次拉取填写的indexbuf为空字符串拉取后续分片时直接填入上次返回的indexbuf即可。
indexbuf = ""
while True:
media_data = self.wx_work.NewMediaData()
ret = self.wx_work.GetMediaData(self.sdk, indexbuf, sdkfileid, "", "", 5, media_data)
if (0 != ret):
print("-- GetMediaData --> 获取失败:", ret)
self.wx_work.FreeMediaData(media_data)
break
# 用msgid作为文件名这里根据需求制定文件名
# save_filename = os.getcwd() + "/images/" + result["msgid"] + file_ext
# ab为二进制文件追加读写用于分片写入不会覆盖前面已经写入的
with open(save_filename, 'ab') as f:
# 这里注意ctypes.string_at的参数addresssize,size就是分片长度得到 bytes
res = ctypes.string_at(self.wx_work.GetData(media_data), self.wx_work.GetDataLen(media_data))
f.write(res)
# 拉取完最后一个分片
if (1 == self.wx_work.IsMediaDataFinish(media_data)):
self.wx_work.FreeMediaData(media_data)
break
else:
# 获取下一个分片的indexbuf
indexbuf = self.wx_work.GetOutIndexBuf(media_data)
# 返回
return True
def _handle_sync_msg_file(self, seqNo, msgId, msgTime, msgFrom, msgType, msgObj, isSaveDatabase=False):
title = ""
file_ext = ""
file_type = ""
file_size = 0
file_content = None
sdkfileid = msgObj.get("sdkfileid")
datetime_str = self._get_datetime_str(msgTime)
if ("video" == msgType):
file_ext = ".mp4"
title = "视频_" + datetime_str.replace(" ", "").replace(":", "") + file_ext
elif ("voice" == msgType):
file_ext = ".amr"
title = "语音_" + datetime_str.replace(" ", "").replace(":", "") + file_ext
elif ("image" == msgType):
file_ext = ".png"
title = "照片_" + datetime_str.replace(" ", "").replace(":", "") + file_ext
elif ("file" == msgType):
file_ext = "." + msgObj.get("fileext")
title = msgObj.get("filename")
# 拉取媒体文件
(real_save_path, url_save_path) = gen_real_file_path()
real_save_path = real_save_path + file_ext
url_save_path = url_save_path + file_ext
self._get_media_file(sdkfileid, real_save_path)
file_size = os.path.getsize(real_save_path)
# 获取类型
file_type = get_document_file_type(url_save_path)
if ("md" == file_type):
file_content = get_text_content(real_save_path)
elif ("txt" == file_type):
file_content = get_text_content(real_save_path)
elif ("code" == file_type):
file_content = get_text_content(real_save_path)
# # 保存到数据库
# if isSaveDatabase:
# self._save_database(title, file_type, url_save_path, None, file_content, file_size, file_ext, msgFrom, datetime_str)
# 返回
resData = {
"title": title,
"file_type": file_type,
"url_save_path": url_save_path,
"real_save_path": real_save_path,
"file_content": file_content,
"file_size": file_size,
"file_ext": file_ext,
"datetime_str": datetime_str,
}
return resData
# 处理文本类型
def _handle_sync_msg_text(self, seqNo, msgId, msgTime, msgFrom, msgType, msgObj, isSaveDatabase=False):
content = msgObj.get("content")
datetime_str = self._get_datetime_str(msgTime)
print(f"-- {msgType} --> msgFrom={msgFrom}, content={content}")
# 调用 云文档 --> 创建 markdown 文件
# if isSaveDatabase:
# self._save_database(content[:50], "md", None, None, content, len(content), ".md", msgFrom, datetime_str)
# 返回
resData = {
"content": content,
"datetime_str": datetime_str,
}
return resData
# 处理链接
def _handle_sync_msg_link(self, seqNo, msgId, msgTime, msgFrom, msgType, msgObj, isSaveDatabase=False):
file_content = ""
title = msgObj.get("title")
description = msgObj.get("description")
link_url = msgObj.get("link_url")
image_url = msgObj.get("image_url")
datetime_str = self._get_datetime_str(msgTime)
print(f"-- {msgType} --> title={title}, description={description}, link_url={link_url}, image_url={image_url}")
if (description and (0<len(description))):
file_content = "> 摘要描述\n> " + description + "\n\n---\n\n"
# 根据 link_url 爬取文章内容
(md_title, md_content) = url_to_markdown(link_url)
file_content = file_content + md_content
# # 保存到数据库
# if isSaveDatabase:
# self._save_database(title, "url", None, link_url, file_content, len(file_content), ".url", msgFrom, datetime_str)
# 返回
resData = {
"title": title,
"description": description,
"link_url": link_url,
"image_url": image_url,
"file_content": file_content,
"datetime_str": datetime_str,
}
return resData
# 处理位置
def _handle_sync_msg_location(self, seqNo, msgId, msgTime, msgFrom, msgType, msgObj, isSaveDatabase=False):
title = msgObj.get("title")
address = msgObj.get("address")
longitude = msgObj.get("longitude")
latitude = msgObj.get("latitude")
zoom = msgObj.get("zoom")
datetime_str = self._get_datetime_str(msgTime)
print(f"-- {msgType} --> title={title}, address={address}, longitude={longitude}, latitude={latitude}, zoom={zoom}")
file_content = f"{title}\n地址:{address}\n精度:{longitude}\n维度:{latitude}"
# # 保存到数据库
# if isSaveDatabase:
# self._save_database(title, "md", None, None, file_content, len(file_content), ".md", msgFrom, datetime_str)
# 返回
resData = {
"title": title,
"address": address,
"longitude": longitude,
"latitude": latitude,
"file_content": file_content,
"datetime_str": datetime_str,
}
return resData
# 处理消息记录
def _handle_sync_msg_mix(self, seqNo, msgId, msgTime, msgFrom, msgType, msgObj, isSaveDatabase=False):
datetime_str = self._get_datetime_str(msgTime)
title = msgObj.get("title")
items = msgObj.get("item")
file_content = "#### " + title
for item in items:
item_type = item.get("type")
item_msgtime = item.get("msgtime")
item_content = item.get("content")
item_datetime_str = self._get_datetime_str(item_msgtime)
if (item_content and (0 < len(item_content))):
item_content_obj = json.loads(item_content)
if ("ChatRecordText" == item_type):
# 处理文本类型
resData = self._handle_sync_msg_text(None, None, item_msgtime, None, "text", item_content_obj, False)
tmp_content = resData.get("content")
elif ("ChatRecordImage" == item_type):
# 处理文件
resData = self._handle_sync_msg_file(None, None, item_msgtime, None, "image", item_content_obj, False)
tmp_content = f'![{resData.get("title")}]({resData.get("url_save_path")})'
elif ("ChatRecordVideo" == item_type):
# 处理文件
resData = self._handle_sync_msg_file(None, None, item_msgtime, None, "video", item_content_obj, False)
# tmp_content = resData.get("title") + "\n视频链接" + resData.get("url_save_path")
tmp_content = f'!video[{resData.get("title")}]({resData.get("url_save_path")})'
elif ("ChatRecordFile" == item_type):
# 处理文件
resData = self._handle_sync_msg_file(None, None, item_msgtime, None, "file", item_content_obj, False)
tmp_content = resData.get("title") + "\n文件链接:" + resData.get("url_save_path")
elif ("ChatRecordLink" == item_type):
# 处理链接
resData = self._handle_sync_msg_link(None, None, item_msgtime, None, "link", item_content_obj, False)
tmp_content = resData.get("title") + "\n原文链接:" + resData.get("link_url")
elif ("ChatRecordLocation" == item_type):
# 处理位置
resData = self._handle_sync_msg_location(None, None, item_msgtime, None, "location", item_content_obj, False)
tmp_content = resData.get("file_content")
elif ("ChatRecordMixed" == item_type):
# 处理消息记录
resData = self._handle_sync_msg_mix(None, None, item_msgtime, None, None, item_content_obj, False)
tmp_content = "---\n\n" + resData.get("file_content") + "\n\n---"
else:
tmp_content = "[该消息类型暂不能展示]"
# 构造内容
file_content = file_content + "\n\n" + "> " + item_datetime_str + "\n" + tmp_content
# 显示
print(f"----> item_type={item_type}, item_msgtime={item_msgtime}, item_content={item_content}")
print(f"----> file_content={file_content}")
# 保存到数据库
# if isSaveDatabase:
# self._save_database(title, "md", None, None, file_content, len(file_content), ".md", msgFrom, datetime_str)
# 返回
resData = {
"title": title,
"file_content": file_content,
"datetime_str": datetime_str,
}
return resData
def sync_msg(self):
private_key = RSA.importKey(self.PRI_KEY)
cipher = PKCS1_v1_5.new(private_key)
slice_ptr = self.wx_work.NewSlice()
print("-------------------------------",self.seq)
ret = self.wx_work.GetChatData(self.sdk, self.seq, 1000, None, None, 5, slice_ptr)
if ret != 0:
print("GetChatData 错误:", ret)
return
origin_data = self.wx_work.GetContentFromSlice(slice_ptr)
self.wx_work.FreeSlice(slice_ptr)
chat_data = json.loads(origin_data).get("chatdata", [])
if not chat_data:
print("没有新消息")
return
for msg in chat_data:
try:
# RSA 解密随机密钥
encrypt_key_bytes = cipher.decrypt(
base64.b64decode(msg.get("encrypt_random_key")), None
)
encrypt_key_str = encrypt_key_bytes.decode("utf-8")
# 解密消息
slice_ptr = self.wx_work.NewSlice()
decrypt_ret = self.wx_work.DecryptData(
encrypt_key_str.encode("utf-8"),
msg.get("encrypt_chat_msg").encode("utf-8"),
slice_ptr
)
if decrypt_ret != 0:
print(f"DecryptData 错误: {decrypt_ret}, msgid={msg.get('msgid')}")
continue
result_json = self.wx_work.GetContentFromSlice(slice_ptr)
self.wx_work.FreeSlice(slice_ptr)
result = json.loads(result_json)
# 更新 seq
self.seq = msg.get("seq", self.seq)
seqNo = msg.get("seq")
msgId = result.get("msgid")
msgTime = result.get("msgtime")
msgFrom = result.get("from")
msgType = result.get("msgtype")
msgObj = result.get(msgType)
# if (("video" == msgType) or ("voice" == msgType) or ("image" == msgType) or ("file" == msgType)):
# # 处理图片、音频、视频、文件
# self._handle_sync_msg_file(seqNo, msgId, msgTime, msgFrom, msgType, msgObj, True)
print(seqNo)
if ("text" == msgType):
self._handle_sync_msg_text(seqNo, msgId, msgTime, msgFrom, msgType, msgObj, True)
elif ("link" == msgType):
self._handle_sync_msg_link(seqNo, msgId, msgTime, msgFrom, msgType, msgObj, True)
elif ("location" == msgType):
self._handle_sync_msg_location(seqNo, msgId, msgTime, msgFrom, msgType, msgObj, True)
elif ("chatrecord" == msgType):
self._handle_sync_msg_mix(seqNo, msgId, msgTime, msgFrom, msgType, msgObj, True)
except Exception as e:
print("处理消息异常:", e, "msg=", msg)
# 释放 SDK
self.wx_work.DestroySdk(self.sdk)
# ====== 使用示例 ======
if __name__ == "__main__":
DLL_PATH = r"D:\pyproject\box\qywx-sdk\windows\C_sdk\FinanceSdkDemo\WeWorkFinanceSdk.dll"
CORP_ID = b"wwed4ac0dd4e1d398d"
SECRET = b"ZPZPpW0tMaIRxhAcywO_lW21ACDUiEBkxJwgrRFRdfg"
# 1⃣ 初始化 SDK
sdk_obj = WeWorkFinanceSDK(DLL_PATH)
sdk_obj.init(CORP_ID, SECRET)
# 2⃣ 加载 RSA 私钥
with open("private.pem", "rb") as f:
PRI_KEY = f.read()
# 3⃣ 初始化 Handler
wx_handler = WeChatWorkHandler(sdk_obj.sdk, sdk_obj._dll)
wx_handler.PRI_KEY = PRI_KEY
# 4⃣ 设置起始 seq
wx_handler.seq = 0
# 5⃣ 同步消息
wx_handler.sync_msg()