2025-09-25 11:42:04 +00:00
import asyncio
import aiohttp
import json
import sys
import os
sys . path . append ( os . path . dirname ( os . path . dirname ( os . path . abspath ( __file__ ) ) ) )
from config import api_info
import asyncio
import aiohttp
import json
from config import api_info
from collections import defaultdict
# ======================
# 调用大模型 API
# ======================
async def call_model_api ( prompt ) :
"""
异步调用 Nuwa Chat Completions API 返回文本输出
"""
url = f " { api_info [ ' base_url ' ] } /chat/completions "
headers = {
" Content-Type " : " application/json " ,
" Authorization " : f " Bearer { api_info [ ' api_key ' ] } "
}
payload = {
" model " : api_info [ " model " ] ,
" messages " : [ { " role " : " user " , " content " : prompt } ] ,
2025-10-12 05:43:20 +00:00
" thinking " : {
" type " : " disabled "
} ,
2025-09-25 11:42:04 +00:00
" max_output_tokens " : 1024
}
async with aiohttp . ClientSession ( ) as session :
try :
async with session . post ( url , headers = headers , json = payload , timeout = 60 ) as resp :
if resp . status == 200 :
result = await resp . json ( )
# 获取模型输出
text = result . get ( " choices " , [ { } ] ) [ 0 ] . get ( " message " , { } ) . get ( " content " , " " )
return text
else :
print ( f " [ERROR] 请求失败: { resp . status } { await resp . text ( ) } " )
return " "
except Exception as e :
print ( f " [ERROR] 请求异常: { e } " )
return " "
# ======================
# 异步解析每篇论文
# ======================
async def parse_paper ( paper ) :
title = paper . get ( " title " ) or paper . get ( " Conference " , " " )
summary = paper . get ( " summary " , " " )
keywords = paper . get ( " keywords " , [ ] )
# Prompt 完整规范
model_prompt = f """
你是一个科研助手 , 请根据以下信息分析论文内容 , 并提炼关键信息总结成 JSON 格式 。 要求 :
1. 输出 JSON 格式 , 字段包含 :
- background : 论文背景 , 简明说明研究动机和问题 , 不抄原文摘要 。
- objective : 研究目标 , 逻辑上支撑方法和贡献 , 如果有多个目标 , 每条编号从 1 开始 , 如 " 1. … " , " 2. … " 。
- method : 研究方法 , 说明论文如何实现目标 , 逻辑上与目标和贡献连贯 , 如果有多条方法 , 每条编号从 1 开始 。
- results : 核心结论 , 概括论文主要结果 。
- contribution : 论文贡献总结 , 总结通过方法解决目标得到的价值与创新点 , 如果有多条贡献 , 每条编号从 1 开始 。
2. * * 要求分析提炼 , 而非复述原文摘要 * * :
- 用你自己的理解重组信息
- 确保逻辑顺序 : objective → method → contribution
- 精炼 、 一针见血 , 但保持完整信息
3. 如果某一项无法从信息中提取 , 请置空 " " 。
4. 输出 JSON 时严格遵循字段名称 , 不添加额外解释文字 。
示例输入 :
Title : Analyzing the Basic Elements of Mobile Viral Marketing - An Empirical Study
Summary : As personal communication tools mobile devices are platforms for word - of - mouth marketing . Given the assigned usefulness of mobile viral marketing , it is surprising to find relatively few studies directed at its basic elements , i . e . , mobile viral content and consumers forwarding this content . The paper presents the findings of an online survey conducted to empirically investigate the consumers ' intention to participate in different kinds of mobile viral marketing strategies and to identify the characteristics of mobile viral mavens in terms of their forwarding behaviour.
Keywords : mobile marketing , viral marketing , consumer behavior
示例输出 :
{ {
" background " : " 移动设备为口碑传播提供了新渠道,但关于病毒营销基本元素的研究仍较少。 " ,
" objective " : " 1. 分析移动病毒营销的核心组成及消费者转发行为,理解不同策略对参与意向的影响。 " ,
" method " : " 2. 设计并实施在线问卷调查,收集消费者行为数据,并进行实证分析以验证策略效果。 " ,
" results " : " 发现消费者对不同类型的移动病毒营销策略表现出不同的参与意向。 " ,
" contribution " : " 3. 提炼移动病毒营销的关键元素及转发行为模式,为营销策略优化提供参考。 "
} }
现在请根据以下信息生成 JSON :
Title : { title }
Summary : { summary }
Keywords : { ' , ' . join ( keywords ) }
"""
try :
model_output = await call_model_api ( model_prompt )
parsed = json . loads ( model_output ) if model_output else {
" background " : " " ,
" objective " : " " ,
" method " : " " ,
" results " : " " ,
" contribution " : " "
}
except Exception :
parsed = {
" background " : " " ,
" objective " : " " ,
" method " : " " ,
" results " : " " ,
" contribution " : " "
}
paper_parsed = paper . copy ( )
paper_parsed [ " parsed_summary " ] = parsed
return paper_parsed
async def parse_ieee_results_all_categories_async ( json_data ) :
"""
解析抓取结果的所有分类 , 并且去重重复文章 ( title相同的只解析一次 ) 。
使用缓存机制避免重复解析同一篇文章 。
"""
# 支持传入两种格式
results = json_data . get ( " results " , { } ) if " results " in json_data else json_data
parsed_results = defaultdict ( list ) # 最终返回结果,分类对应列表
seen_titles = set ( ) # 全局去重,防止重复解析
cache = { } # 缓存已解析文章: title -> 解析后的数据
for category , papers in results . items ( ) :
tasks = [ ]
for paper in papers :
title = paper . get ( " title " ) or paper . get ( " Conference " , " " )
if title in seen_titles :
# 已解析过,直接复用缓存
tasks . append ( asyncio . sleep ( 0 , result = cache [ title ] ) )
else :
# 新文章,加入任务并记录
seen_titles . add ( title )
task = asyncio . create_task ( parse_paper ( paper ) )
tasks . append ( task )
if tasks :
parsed_papers = await asyncio . gather ( * tasks )
# 保存到缓存并添加到对应分类
for parsed_paper in parsed_papers :
t = parsed_paper . get ( " title " ) or parsed_paper . get ( " Conference " , " " )
cache [ t ] = parsed_paper
parsed_results [ category ] . append ( parsed_paper )
return dict ( parsed_results )