This commit is contained in:
parent
f96a95fd18
commit
05425ba9bf
|
|
@ -14,7 +14,10 @@ services:
|
||||||
container_name: selenium-crawl-container
|
container_name: selenium-crawl-container
|
||||||
ports:
|
ports:
|
||||||
- "5001:5000"
|
- "5001:5000"
|
||||||
|
environment:
|
||||||
|
API_MODEL: "glm-4.5"
|
||||||
|
API_BASE_URL: "https://open.bigmodel.cn/api/paas/v4"
|
||||||
|
API_KEY: "ce39bdd4fcf34ec0aec75072bc9ff988.hAp7HZTVUwy7vImn"
|
||||||
# ---------- Django + Celery ----------
|
# ---------- Django + Celery ----------
|
||||||
selenium_django:
|
selenium_django:
|
||||||
build: ./selenium_django
|
build: ./selenium_django
|
||||||
|
|
@ -24,7 +27,11 @@ services:
|
||||||
CELERY_BROKER_URL: redis://redis:6379/0
|
CELERY_BROKER_URL: redis://redis:6379/0
|
||||||
CELERY_RESULT_BACKEND: redis://redis:6379/0
|
CELERY_RESULT_BACKEND: redis://redis:6379/0
|
||||||
# Django 调用爬虫服务的地址
|
# Django 调用爬虫服务的地址
|
||||||
CRAWL_API_URL: http://47.83.141.164:5001/crawl
|
CRAWL_API_URL: http://47.83.141.164:5001
|
||||||
|
# API 配置
|
||||||
|
API_MODEL: "glm-4-long"
|
||||||
|
API_BASE_URL: "https://open.bigmodel.cn/api/paas/v4"
|
||||||
|
API_KEY: "ce39bdd4fcf34ec0aec75072bc9ff988.hAp7HZTVUwy7vImn"
|
||||||
volumes:
|
volumes:
|
||||||
- "./selenium_django:/app"
|
- "./selenium_django:/app"
|
||||||
depends_on:
|
depends_on:
|
||||||
|
|
@ -39,8 +46,8 @@ services:
|
||||||
context: ./selenium_vue # 上一级目录
|
context: ./selenium_vue # 上一级目录
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
args:
|
args:
|
||||||
VITE_API_BASE_URL: http://47.83.141.164:8002
|
VITE_API_BASE_URL: http://47.83.141.164:8002 # 改为远程 IP
|
||||||
VITE_CRAWL_URL: http://47.83.141.164:5001/crawl
|
VITE_CRAWL_URL: http://47.83.141.164:5001 # 改为远程 IP
|
||||||
container_name: selenium-vue-container
|
container_name: selenium-vue-container
|
||||||
environment:
|
environment:
|
||||||
PORT: 80
|
PORT: 80
|
||||||
|
|
|
||||||
|
|
@ -4,9 +4,9 @@ from selenium import webdriver
|
||||||
from selenium.webdriver.chrome.service import Service
|
from selenium.webdriver.chrome.service import Service
|
||||||
import os
|
import os
|
||||||
api_info = {
|
api_info = {
|
||||||
"model": "glm-4.5",
|
"model": os.environ.get("API_MODEL", "glm-4.5"), # 默认值可选
|
||||||
"base_url": "https://open.bigmodel.cn/api/paas/v4",
|
"base_url": os.environ.get("API_BASE_URL", "https://open.bigmodel.cn/api/paas/v4"),
|
||||||
"api_key": "ce39bdd4fcf34ec0aec75072bc9ff988.hAp7HZTVUwy7vImn"
|
"api_key": os.environ.get("API_KEY", ""),
|
||||||
}
|
}
|
||||||
|
|
||||||
# chrome浏览器以及驱动配置
|
# chrome浏览器以及驱动配置
|
||||||
|
|
|
||||||
|
|
@ -39,26 +39,31 @@ def translate_text(text):
|
||||||
return {"chinese": [], "english": []}
|
return {"chinese": [], "english": []}
|
||||||
|
|
||||||
# 构造 prompt
|
# 构造 prompt
|
||||||
prompt = (
|
system_prompt = """你是一名科研检索关键词提炼专家,任务是将用户输入的自然语言直接提炼为学术检索关键词。
|
||||||
"你是科研助手,输入是一句话或中文关键词列表。"
|
【要求】
|
||||||
"请从输入中理解语义,提取与科研论文主题最相关、最核心的中文主题,并翻译为英文。"
|
1. 提炼输入中的核心研究对象、问题、方法或应用场景。
|
||||||
"只保留1~2个最核心主题,不要加入无关内容。"
|
2. 用学术化中文表达,避免口语化或宽泛词汇。
|
||||||
"输出必须严格遵守 JSON 格式,不允许有额外文字或符号:{\"chinese\": [...], \"english\": [...]}。\n"
|
3. 给出对应英文表达,使用国际学术界常用专业术语。
|
||||||
"示例输入输出:\n"
|
4. 如果输入包含多个研究问题,请分别提炼关键词,每个字段最多 3 个关键词。
|
||||||
"输入: '我想获取基于深度学习的图像识别方面的研究'\n"
|
5. 删除无关修饰词或无检索价值的词。
|
||||||
"输出: {\"chinese\": [\"基于深度学习的图像识别\"], \"english\": [\"Deep Learning-based Image Recognition\"]}\n"
|
6. 输出严格 JSON 格式,仅包含 `chinese` 和 `english` 字段,值为列表。
|
||||||
"输入: '图像识别在深度学习方面的研究'\n"
|
|
||||||
"输出: {\"chinese\": [\"基于深度学习的图像识别\"], \"english\": [\"Deep Learning-based Image Recognition\"]}\n"
|
【示例】
|
||||||
"输入: '自然语言处理模型在文本分类中的应用'\n"
|
输入: '我想研究深度强化学习在机器人控制中的应用'
|
||||||
"输出: {\"chinese\": [\"自然语言处理文本分类\"], \"english\": [\"NLP Text Classification\"]}\n"
|
输出: {"chinese": ["深度强化学习", "机器人控制"], "english": ["Deep Reinforcement Learning", "Robot Control"]}
|
||||||
"输入: '强化学习在自动驾驶决策中的最新进展'\n"
|
|
||||||
"输出: {\"chinese\": [\"强化学习自动驾驶决策\"], \"english\": [\"Reinforcement Learning for Autonomous Driving Decision-Making\"]}\n"
|
输入: '大模型多轮对话迷失的问题及解决方案'
|
||||||
"输入: '使用图神经网络进行社交网络分析的研究'\n"
|
输出: {"chinese": ["大型语言模型", "多轮对话上下文漂移"], "english": ["Large Language Models", "Context Drift in Multi-turn Dialogue"]}
|
||||||
"输出: {\"chinese\": [\"图神经网络社交网络分析\"], \"english\": [\"Graph Neural Networks for Social Network Analysis\"]}\n"
|
|
||||||
"输入: '我想研究深度强化学习在机器人控制中的应用'\n"
|
输入: '人工智能幻觉问题及多轮对话迷失的解决方法,包括意图识别工作'
|
||||||
"输出: {\"chinese\": [\"深度强化学习机器人控制\"], \"english\": [\"Deep Reinforcement Learning for Robot Control\"]}\n"
|
输出: {"chinese": ["人工智能幻觉", "多轮对话上下文漂移", "意图识别"], "english": ["AI Hallucination", "Context Drift in Multi-turn Dialogue", "Intent Recognition"]}
|
||||||
f"现在请对输入提取核心主题:\n输入: {text}"
|
|
||||||
)
|
输入: '了解生态系统的能量流动和物种多样性'
|
||||||
|
输出: {"chinese": ["生态系统能量流动", "物种多样性"], "english": ["Ecosystem Energy Flow", "Species Diversity"]}
|
||||||
|
"""
|
||||||
|
user_prompt=f"""输入:{text}
|
||||||
|
请严格输出符合 JSON 格式的核心科研关键词:
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
url = f"{api_info['base_url']}/chat/completions"
|
url = f"{api_info['base_url']}/chat/completions"
|
||||||
|
|
@ -68,12 +73,21 @@ def translate_text(text):
|
||||||
}
|
}
|
||||||
payload = {
|
payload = {
|
||||||
"model": api_info["model"],
|
"model": api_info["model"],
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
"messages": [
|
||||||
|
{"role": "system", "content": system_prompt},
|
||||||
|
{"role": "user", "content": user_prompt}
|
||||||
|
],
|
||||||
|
"thinking": {
|
||||||
|
"type": "disabled"
|
||||||
|
},
|
||||||
"max_output_tokens": 512
|
"max_output_tokens": 512
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
resp = requests.post(url, headers=headers, json=payload, timeout=30)
|
resp = requests.post(url, headers=headers, json=payload, timeout=60)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
result = resp.json()
|
result = resp.json()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,9 @@ async def call_model_api(prompt):
|
||||||
payload = {
|
payload = {
|
||||||
"model": api_info["model"],
|
"model": api_info["model"],
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"thinking": {
|
||||||
|
"type": "disabled"
|
||||||
|
},
|
||||||
"max_output_tokens": 1024
|
"max_output_tokens": 1024
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -20,11 +20,6 @@ ENV CELERY_BROKER_URL=${CELERY_BROKER_URL:-redis://redis:6379/0}
|
||||||
ENV CELERY_RESULT_BACKEND=${CELERY_RESULT_BACKEND:-redis://redis:6379/0}
|
ENV CELERY_RESULT_BACKEND=${CELERY_RESULT_BACKEND:-redis://redis:6379/0}
|
||||||
ENV CRAWL_API_URL=${CRAWL_API_URL:-http://47.83.141.164:5001/crawl}
|
ENV CRAWL_API_URL=${CRAWL_API_URL:-http://47.83.141.164:5001/crawl}
|
||||||
|
|
||||||
# 在构建时替换 settings.py 中的配置
|
|
||||||
RUN sed -i "s#CELERY_BROKER_URL = .*#CELERY_BROKER_URL = '${CELERY_BROKER_URL}'#" selenium_django/settings.py && \
|
|
||||||
sed -i "s#CELERY_RESULT_BACKEND = .*#CELERY_RESULT_BACKEND = '${CELERY_RESULT_BACKEND}'#" selenium_django/settings.py && \
|
|
||||||
sed -i "s#CRAWL_API_URL = .*#CRAWL_API_URL = '${CRAWL_API_URL}'#" selenium_django/settings.py
|
|
||||||
|
|
||||||
# 入口脚本
|
# 入口脚本
|
||||||
COPY entrypoint.sh /entrypoint.sh
|
COPY entrypoint.sh /entrypoint.sh
|
||||||
RUN chmod +x /entrypoint.sh
|
RUN chmod +x /entrypoint.sh
|
||||||
|
|
|
||||||
|
|
@ -3,14 +3,16 @@ from django.db import models
|
||||||
# Create your models here.
|
# Create your models here.
|
||||||
from django.db import models
|
from django.db import models
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Task(models.Model):
|
class Task(models.Model):
|
||||||
TASK_STATUS_CHOICES = [
|
TASK_STATUS_CHOICES = [
|
||||||
('running', '进行中'),
|
('running', '进行中'),
|
||||||
|
('queued', '进行中'),
|
||||||
('idle', '空闲中'),
|
('idle', '空闲中'),
|
||||||
('done', '完成'),
|
('done', '完成'),
|
||||||
('failed', '失败'),
|
('failed', '失败'),
|
||||||
]
|
]
|
||||||
|
|
||||||
EXECUTION_TYPE_CHOICES = [
|
EXECUTION_TYPE_CHOICES = [
|
||||||
('scheduled', '定期执行'),
|
('scheduled', '定期执行'),
|
||||||
('predefined', '预定时间执行'),
|
('predefined', '预定时间执行'),
|
||||||
|
|
@ -53,3 +55,14 @@ class TaskDetail(models.Model):
|
||||||
parsed_summary = models.JSONField(blank=True, null=True) # 存储 JSON
|
parsed_summary = models.JSONField(blank=True, null=True) # 存储 JSON
|
||||||
title = models.CharField(max_length=300, blank=True)
|
title = models.CharField(max_length=300, blank=True)
|
||||||
created_at = models.DateTimeField(auto_now_add=True)
|
created_at = models.DateTimeField(auto_now_add=True)
|
||||||
|
|
||||||
|
class CrawlQueue(models.Model):
|
||||||
|
task = models.ForeignKey(Task, on_delete=models.CASCADE, related_name="queue")
|
||||||
|
texts = models.TextField()
|
||||||
|
parse_flag = models.BooleanField(default=True)
|
||||||
|
limit = models.IntegerField(default=10)
|
||||||
|
sort_options = models.JSONField(default=list)
|
||||||
|
status = models.CharField(max_length=20, default="pending") # pending / processing / done / failed
|
||||||
|
created_at = models.DateTimeField(auto_now_add=True)
|
||||||
|
updated_at = models.DateTimeField(auto_now=True)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ from apscheduler.schedulers.background import BackgroundScheduler
|
||||||
from apscheduler.triggers.date import DateTrigger
|
from apscheduler.triggers.date import DateTrigger
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
from datetime import datetime, date
|
from datetime import datetime, date
|
||||||
from .models import Task
|
from .models import Task,CrawlQueue
|
||||||
from .tasks import trigger_task_execution
|
from .tasks import trigger_task_execution
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -25,9 +25,9 @@ def start_scheduler():
|
||||||
scheduler.add_job(sync_scheduled_tasks, 'interval', seconds=30)
|
scheduler.add_job(sync_scheduled_tasks, 'interval', seconds=30)
|
||||||
|
|
||||||
def check_predefined_tasks():
|
def check_predefined_tasks():
|
||||||
"""检查一次性任务并触发 Celery 异步执行"""
|
"""检查一次性任务并加入队列"""
|
||||||
logger.info("检查一次性任务: 开始")
|
logger.info("检查一次性任务: 开始")
|
||||||
now = datetime.now() # 使用本地时间
|
now = datetime.now()
|
||||||
tasks = Task.objects.filter(status='idle', execution_type='predefined')
|
tasks = Task.objects.filter(status='idle', execution_type='predefined')
|
||||||
logger.debug(f"[Predefined] 检查 {len(tasks)} 个一次性任务, 当前时间 {now}")
|
logger.debug(f"[Predefined] 检查 {len(tasks)} 个一次性任务, 当前时间 {now}")
|
||||||
|
|
||||||
|
|
@ -37,24 +37,34 @@ def check_predefined_tasks():
|
||||||
logger.warning(f"Task {task.id} 没有设置 execution_time,跳过")
|
logger.warning(f"Task {task.id} 没有设置 execution_time,跳过")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 数据库里已经是本地时间,不需要再做 timezone aware
|
|
||||||
if exec_time <= now:
|
if exec_time <= now:
|
||||||
try:
|
try:
|
||||||
# 异步调用 Celery 执行任务,只传 task.id
|
# 排队逻辑
|
||||||
trigger_task_execution.delay(task.id)
|
task.status = 'running' # 前端显示为进行中
|
||||||
logger.info(f"Task {task.id} 已触发 Celery 异步执行")
|
|
||||||
|
|
||||||
# 更新任务状态为 done,避免重复触发
|
|
||||||
task.status = 'done'
|
|
||||||
task.save(update_fields=['status'])
|
task.save(update_fields=['status'])
|
||||||
|
|
||||||
|
CrawlQueue.objects.create(
|
||||||
|
task=task,
|
||||||
|
texts=task.description,
|
||||||
|
parse_flag=task.parse_flag,
|
||||||
|
limit=task.limit,
|
||||||
|
sort_options=[],
|
||||||
|
status="pending"
|
||||||
|
)
|
||||||
|
process_crawl_queue.delay()
|
||||||
|
|
||||||
|
logger.info(f"Task {task.id} 已加入队列")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"触发 Task {task.id} 时出错: {e}")
|
logger.error(f"触发 Task {task.id} 时出错: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def sync_scheduled_tasks():
|
def sync_scheduled_tasks():
|
||||||
"""同步每日定时任务到 APScheduler"""
|
"""同步每日定时任务到 APScheduler"""
|
||||||
today = date.today()
|
today = date.today()
|
||||||
now = datetime.now() # 本地时间
|
now = datetime.now()
|
||||||
tasks = Task.objects.filter(status='idle', execution_type='scheduled')
|
tasks = Task.objects.filter(status='idle', execution_type='scheduled')
|
||||||
logger.debug(f"[Scheduled] 检查 {len(tasks)} 个每日任务, 当前时间 {now}")
|
logger.debug(f"[Scheduled] 检查 {len(tasks)} 个每日任务, 当前时间 {now}")
|
||||||
|
|
||||||
|
|
@ -71,10 +81,9 @@ def sync_scheduled_tasks():
|
||||||
|
|
||||||
last_run = task.last_run_date
|
last_run = task.last_run_date
|
||||||
if last_run != today:
|
if last_run != today:
|
||||||
# 直接用本地时间,不再 make_aware
|
|
||||||
exec_datetime = datetime.combine(today, scheduled_time_obj)
|
exec_datetime = datetime.combine(today, scheduled_time_obj)
|
||||||
|
|
||||||
job_id = f"scheduled_task_{task.id}"
|
job_id = f"scheduled_task_{task.id}"
|
||||||
|
|
||||||
if not scheduler.get_job(job_id):
|
if not scheduler.get_job(job_id):
|
||||||
scheduler.add_job(
|
scheduler.add_job(
|
||||||
run_scheduled_task,
|
run_scheduled_task,
|
||||||
|
|
@ -86,7 +95,6 @@ def sync_scheduled_tasks():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def run_scheduled_task(task_id):
|
def run_scheduled_task(task_id):
|
||||||
"""执行每日定时任务"""
|
"""执行每日定时任务"""
|
||||||
try:
|
try:
|
||||||
|
|
@ -96,9 +104,23 @@ def run_scheduled_task(task_id):
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
trigger_task_execution.delay(task.id)
|
# 排队逻辑
|
||||||
logger.info(f"[Scheduled] Task {task.id} 已触发 Celery 执行")
|
task.status = 'running'
|
||||||
|
task.save(update_fields=['status'])
|
||||||
|
|
||||||
|
CrawlQueue.objects.create(
|
||||||
|
task=task,
|
||||||
|
texts=task.description,
|
||||||
|
parse_flag=task.parse_flag,
|
||||||
|
limit=task.limit,
|
||||||
|
sort_options=[],
|
||||||
|
status="pending"
|
||||||
|
)
|
||||||
|
process_crawl_queue.delay()
|
||||||
|
|
||||||
|
logger.info(f"[Scheduled] Task {task.id} 已加入队列")
|
||||||
task.last_run_date = date.today()
|
task.last_run_date = date.today()
|
||||||
task.save(update_fields=['last_run_date'])
|
task.save(update_fields=['last_run_date'])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"[Scheduled] 执行 Task {task.id} 出错: {e}")
|
logger.error(f"[Scheduled] 执行 Task {task.id} 出错: {e}")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,9 +2,9 @@
|
||||||
import requests
|
import requests
|
||||||
from datetime import datetime, date
|
from datetime import datetime, date
|
||||||
|
|
||||||
from django.db import transaction
|
from django.db import transaction, DatabaseError
|
||||||
|
|
||||||
from .models import Task, TaskDetail
|
from .models import Task, TaskDetail,CrawlQueue
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
|
|
@ -18,123 +18,135 @@ def safe_dict_get(d, key, default=None):
|
||||||
return d.get(key, default)
|
return d.get(key, default)
|
||||||
return default
|
return default
|
||||||
|
|
||||||
@shared_task(bind=True, max_retries=3, default_retry_delay=60)
|
@shared_task(bind=True, queue='crawler', max_retries=3, default_retry_delay=60)
|
||||||
def trigger_task_execution(self, task_id):
|
def trigger_task_execution(self, task_id):
|
||||||
"""异步执行单个任务"""
|
"""接收任务 → 入队等待处理"""
|
||||||
task = None
|
|
||||||
try:
|
try:
|
||||||
# 获取任务
|
|
||||||
task = Task.objects.get(id=task_id)
|
task = Task.objects.get(id=task_id)
|
||||||
task.status = 'running'
|
# 标记为排队状态
|
||||||
|
task.status = 'queued'
|
||||||
task.save(update_fields=['status'])
|
task.save(update_fields=['status'])
|
||||||
print(f"任务 {task_id} 状态更新为 running")
|
print(f"任务 {task_id} 状态更新为 queued")
|
||||||
|
|
||||||
# 爬虫请求
|
# 将任务存入 CrawlQueue
|
||||||
payload = {
|
CrawlQueue.objects.create(
|
||||||
"texts": task.description,
|
task=task,
|
||||||
"parse": task.parse_flag,
|
texts=task.description,
|
||||||
"limit": task.limit
|
parse_flag=task.parse_flag,
|
||||||
}
|
limit=task.limit,
|
||||||
|
sort_options=[],
|
||||||
|
status="pending",
|
||||||
|
)
|
||||||
|
print(f"任务 {task_id} 已加入爬虫队列")
|
||||||
|
|
||||||
try:
|
# 立即触发队列处理任务
|
||||||
resp = requests.post(CRAWL_API_URL, json=payload, timeout=30000)
|
process_crawl_queue.delay()
|
||||||
resp.raise_for_status()
|
|
||||||
except requests.RequestException as e:
|
|
||||||
print(f"Task {task_id} 爬虫请求失败: {e}")
|
|
||||||
raise self.retry(exc=e)
|
|
||||||
|
|
||||||
# 安全解析 JSON
|
|
||||||
try:
|
|
||||||
data = resp.json()
|
|
||||||
if not isinstance(data, dict):
|
|
||||||
print(f"Task {task_id} 返回数据不是字典,用空 dict 代替")
|
|
||||||
data = {}
|
|
||||||
except ValueError:
|
|
||||||
print(f"Task {task_id} 返回非 JSON 数据: {resp.text[:200]}")
|
|
||||||
data = {}
|
|
||||||
|
|
||||||
# code==20000 说明提取失败
|
|
||||||
if safe_dict_get(data, "code") == 20000:
|
|
||||||
print(f"Task {task_id} 爬虫返回 code=20000, message={data.get('message')}")
|
|
||||||
return {"success": False, "message": data.get("message", "提取不到关键词")}
|
|
||||||
|
|
||||||
# 保存任务详情
|
|
||||||
results = safe_dict_get(data, "results", [])
|
|
||||||
if not isinstance(results, list):
|
|
||||||
results = []
|
|
||||||
|
|
||||||
with transaction.atomic():
|
|
||||||
for idx, item in enumerate(results, start=1):
|
|
||||||
if not isinstance(item, dict):
|
|
||||||
print(f"Task {task_id} results 第 {idx} 个元素不是字典,跳过")
|
|
||||||
continue
|
|
||||||
|
|
||||||
download_val = item.get("download") or 0
|
|
||||||
try:
|
|
||||||
download_val = int(download_val)
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
download_val = 0
|
|
||||||
|
|
||||||
date_val = str(item.get("date")) if item.get("date") else None
|
|
||||||
|
|
||||||
author_val = item.get("author")
|
|
||||||
if isinstance(author_val, list):
|
|
||||||
author_val = ';'.join(author_val)
|
|
||||||
elif author_val is None:
|
|
||||||
author_val = ''
|
|
||||||
|
|
||||||
keywords_val = item.get("keywords")
|
|
||||||
if isinstance(keywords_val, list):
|
|
||||||
keywords_val = ';'.join(keywords_val)
|
|
||||||
else:
|
|
||||||
keywords_val = ''
|
|
||||||
|
|
||||||
pdf_url = item.get("pdfUrl") or ''
|
|
||||||
parsed_summary = item.get("parsed_summary") or {}
|
|
||||||
quote_val = item.get("quote") or ''
|
|
||||||
site_val = item.get("site") or ''
|
|
||||||
source_val = item.get("source") or ''
|
|
||||||
summary_val = item.get("summary") or ''
|
|
||||||
title_val = item.get("title") or ''
|
|
||||||
original_link = item.get("originalLink") or ''
|
|
||||||
|
|
||||||
# 保存 TaskDetail,单条失败不影响其他条
|
|
||||||
try:
|
|
||||||
TaskDetail.objects.get_or_create(
|
|
||||||
task=task,
|
|
||||||
original_link=original_link,
|
|
||||||
defaults={
|
|
||||||
'author': author_val,
|
|
||||||
'date': date_val,
|
|
||||||
'download': download_val,
|
|
||||||
'keywords': keywords_val,
|
|
||||||
'pdf_url': pdf_url,
|
|
||||||
'parsed_summary': parsed_summary,
|
|
||||||
'quote': quote_val,
|
|
||||||
'site': site_val,
|
|
||||||
'source': source_val,
|
|
||||||
'summary': summary_val,
|
|
||||||
'title': title_val
|
|
||||||
}
|
|
||||||
)
|
|
||||||
print(f"Task {task_id} 保存第 {idx} 条结果成功")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Task {task_id} 保存第 {idx} 条结果失败: {e}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 更新任务状态为 done
|
|
||||||
task.status = 'done'
|
|
||||||
task.save(update_fields=['status'])
|
|
||||||
print(f"任务 {task_id} 执行完成")
|
|
||||||
|
|
||||||
except Task.DoesNotExist:
|
except Task.DoesNotExist:
|
||||||
print(f"Task {task_id} 不存在")
|
print(f"Task {task_id} 不存在")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Task {task_id} 执行失败: {e}")
|
print(f"Task {task_id} 入队失败: {e}")
|
||||||
|
raise self.retry(exc=e)
|
||||||
|
|
||||||
|
@shared_task(bind=True, queue='crawl_worker', max_retries=3, default_retry_delay=60)
|
||||||
|
def process_crawl_queue(self):
|
||||||
|
"""
|
||||||
|
顺序执行队列任务,确保一个接着一个执行
|
||||||
|
"""
|
||||||
|
item = None
|
||||||
|
try:
|
||||||
|
# 获取最早 pending 任务(加锁避免并发)
|
||||||
|
with transaction.atomic():
|
||||||
|
item = (
|
||||||
|
CrawlQueue.objects
|
||||||
|
.select_for_update(skip_locked=True)
|
||||||
|
.filter(status='pending')
|
||||||
|
.order_by('created_at')
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
if not item:
|
||||||
|
return "no task"
|
||||||
|
|
||||||
|
# 标记队列和任务状态
|
||||||
|
item.status = 'processing'
|
||||||
|
item.save(update_fields=['status'])
|
||||||
|
|
||||||
|
task = item.task
|
||||||
|
task.status = 'running'
|
||||||
|
task.save(update_fields=['status'])
|
||||||
|
|
||||||
|
# 事务之外执行网络请求,减少锁表时间
|
||||||
|
payload = {
|
||||||
|
"texts": item.texts,
|
||||||
|
"parse": item.parse_flag,
|
||||||
|
"limit": item.limit,
|
||||||
|
"sort": item.sort_options
|
||||||
|
}
|
||||||
|
print(f"开始请求爬虫 task_id={task.id}")
|
||||||
|
resp = requests.post(CRAWL_API_URL, json=payload, timeout=300)
|
||||||
|
resp.raise_for_status()
|
||||||
try:
|
try:
|
||||||
if task:
|
data = resp.json()
|
||||||
|
except ValueError:
|
||||||
|
print(f"Task {task.id} 返回非 JSON 数据: {resp.text[:200]}")
|
||||||
|
data = {}
|
||||||
|
|
||||||
|
results = data.get("results", [])
|
||||||
|
if not isinstance(results, list):
|
||||||
|
results = []
|
||||||
|
|
||||||
|
# 保存结果,事务保护
|
||||||
|
with transaction.atomic():
|
||||||
|
for idx, r in enumerate(results, start=1):
|
||||||
|
TaskDetail.objects.get_or_create(
|
||||||
|
task=task,
|
||||||
|
original_link=r.get("originalLink") or "",
|
||||||
|
defaults={
|
||||||
|
"author": ";".join(r.get("author", [])) if isinstance(r.get("author"), list) else (r.get("author") or ""),
|
||||||
|
"date": str(r.get("date")) if r.get("date") else None,
|
||||||
|
"download": int(r.get("download") or 0),
|
||||||
|
"keywords": ";".join(r.get("keywords", [])) if isinstance(r.get("keywords"), list) else (r.get("keywords") or ""),
|
||||||
|
"pdf_url": r.get("pdfUrl") or "",
|
||||||
|
"parsed_summary": r.get("parsed_summary") or {},
|
||||||
|
"quote": r.get("quote") or "",
|
||||||
|
"site": r.get("site") or "",
|
||||||
|
"source": r.get("source") or "",
|
||||||
|
"summary": r.get("summary") or "",
|
||||||
|
"title": r.get("title") or "",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
print(f"Task {task.id} 保存第 {idx} 条结果成功")
|
||||||
|
|
||||||
|
# 标记完成
|
||||||
|
with transaction.atomic():
|
||||||
|
task.status = 'done'
|
||||||
|
task.save(update_fields=['status'])
|
||||||
|
item.status = 'done'
|
||||||
|
item.save(update_fields=['status'])
|
||||||
|
print(f"任务 {task.id} 执行完成")
|
||||||
|
|
||||||
|
except requests.RequestException as e:
|
||||||
|
print(f"网络请求失败 task_id={item.task.id if item else 'N/A'}: {e}")
|
||||||
|
if item:
|
||||||
|
with transaction.atomic():
|
||||||
|
item.status = 'pending'
|
||||||
|
item.save(update_fields=['status'])
|
||||||
|
raise self.retry(exc=e)
|
||||||
|
|
||||||
|
except DatabaseError as e:
|
||||||
|
print(f"数据库异常 task_id={item.task.id if item else 'N/A'}: {e}")
|
||||||
|
raise self.retry(exc=e)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"任务执行失败 task_id={item.task.id if item else 'N/A'}: {e}")
|
||||||
|
if item:
|
||||||
|
with transaction.atomic():
|
||||||
task.status = 'failed'
|
task.status = 'failed'
|
||||||
task.save(update_fields=['status'])
|
task.save(update_fields=['status'])
|
||||||
except Exception as e2:
|
item.status = 'failed'
|
||||||
print(f"更新任务失败状态失败: {e2}")
|
item.save(update_fields=['status'])
|
||||||
raise self.retry(exc=e)
|
raise self.retry(exc=e)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# 触发下一个队列任务
|
||||||
|
process_crawl_queue.apply_async(countdown=1)
|
||||||
|
|
@ -10,13 +10,21 @@ from django_filters.rest_framework import DjangoFilterBackend
|
||||||
# Create your views here.
|
# Create your views here.
|
||||||
from rest_framework import viewsets, filters
|
from rest_framework import viewsets, filters
|
||||||
from rest_framework.pagination import PageNumberPagination
|
from rest_framework.pagination import PageNumberPagination
|
||||||
from .models import Task, TaskDetail
|
from .models import Task, TaskDetail,CrawlQueue
|
||||||
from .serializers import TaskSerializer, TaskDetailSerializer, TaskListSerializer
|
from .serializers import TaskSerializer, TaskDetailSerializer, TaskListSerializer
|
||||||
from rest_framework.decorators import action
|
from rest_framework.decorators import action
|
||||||
from rest_framework.response import Response
|
from rest_framework.response import Response
|
||||||
from rest_framework import status
|
from rest_framework import status
|
||||||
from .tasks import trigger_task_execution
|
from .tasks import trigger_task_execution,process_crawl_queue
|
||||||
import threading
|
import threading
|
||||||
|
import logging
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
print(f'----------chat----------init---------')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# 分页设置
|
# 分页设置
|
||||||
class StandardResultsSetPagination(PageNumberPagination):
|
class StandardResultsSetPagination(PageNumberPagination):
|
||||||
page_size = 10
|
page_size = 10
|
||||||
|
|
@ -36,6 +44,7 @@ def sync_stream(generator):
|
||||||
# 获取异步生成器的下一条数据
|
# 获取异步生成器的下一条数据
|
||||||
chunk = loop.run_until_complete(async_gen.__anext__())
|
chunk = loop.run_until_complete(async_gen.__anext__())
|
||||||
if chunk and chunk.strip():
|
if chunk and chunk.strip():
|
||||||
|
print(chunk)
|
||||||
yield chunk
|
yield chunk
|
||||||
except StopAsyncIteration:
|
except StopAsyncIteration:
|
||||||
break
|
break
|
||||||
|
|
@ -52,6 +61,9 @@ async def call_model_stream(messages):
|
||||||
"model": api_info["model"],
|
"model": api_info["model"],
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"max_output_tokens": 1024,
|
"max_output_tokens": 1024,
|
||||||
|
"thinking": {
|
||||||
|
"type": "disabled"
|
||||||
|
},
|
||||||
"stream": True
|
"stream": True
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -77,6 +89,12 @@ class TaskViewSet(viewsets.ModelViewSet):
|
||||||
ordering_fields = ['created_at', 'updated_at']
|
ordering_fields = ['created_at', 'updated_at']
|
||||||
|
|
||||||
def get_serializer_class(self):
|
def get_serializer_class(self):
|
||||||
|
|
||||||
|
|
||||||
|
print(f'----------get_serializer_class-------------------')
|
||||||
|
print(f'1111111111')
|
||||||
|
|
||||||
|
|
||||||
if self.action == 'list':
|
if self.action == 'list':
|
||||||
return TaskListSerializer # list 返回简化字段
|
return TaskListSerializer # list 返回简化字段
|
||||||
return TaskSerializer # retrieve 返回完整字段,含 details
|
return TaskSerializer # retrieve 返回完整字段,含 details
|
||||||
|
|
@ -86,14 +104,26 @@ class TaskViewSet(viewsets.ModelViewSet):
|
||||||
task = self.get_object()
|
task = self.get_object()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 异步触发 Celery 任务
|
# 标记任务为排队状态(前端显示“进行中”)
|
||||||
async_result = trigger_task_execution.delay(task.id)
|
task.status = 'running' # 前端仍然可理解为“进行中”
|
||||||
|
task.save(update_fields=['status'])
|
||||||
|
|
||||||
|
# 创建队列记录
|
||||||
|
CrawlQueue.objects.create(
|
||||||
|
task=task,
|
||||||
|
texts=task.description,
|
||||||
|
parse_flag=task.parse_flag,
|
||||||
|
limit=task.limit,
|
||||||
|
sort_options=[],
|
||||||
|
status="pending"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 触发队列处理任务(异步,单 worker 串行执行)
|
||||||
|
process_crawl_queue.delay()
|
||||||
|
|
||||||
# 直接返回任务已触发,不访问 async_result 的内容
|
|
||||||
return Response({
|
return Response({
|
||||||
"success": True,
|
"success": True,
|
||||||
"task_id": async_result.id,
|
"message": f"任务 {task.id} 已加入队列"
|
||||||
"message": f"任务 {task.id} 已触发"
|
|
||||||
}, status=status.HTTP_200_OK)
|
}, status=status.HTTP_200_OK)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
@ -104,12 +134,23 @@ class TaskViewSet(viewsets.ModelViewSet):
|
||||||
|
|
||||||
@action(detail=True, methods=['post'])
|
@action(detail=True, methods=['post'])
|
||||||
def chat(self, request, pk=None):
|
def chat(self, request, pk=None):
|
||||||
|
|
||||||
|
print(f'----------chat-------------------')
|
||||||
|
print(f'222222222222222')
|
||||||
|
|
||||||
task = self.get_object()
|
task = self.get_object()
|
||||||
user_question = request.data.get("question", "")
|
user_question = request.data.get("question", "")
|
||||||
|
|
||||||
|
|
||||||
|
print(f'----chat--------------user_question={user_question}--------------')
|
||||||
|
|
||||||
if not user_question:
|
if not user_question:
|
||||||
return Response({"success": False, "message": "question 参数不能为空"}, status=400)
|
return Response({"success": False, "message": "question 参数不能为空"}, status=400)
|
||||||
|
|
||||||
# 构造结构化文档
|
# 构造结构化文档
|
||||||
|
|
||||||
|
print(f'----chat--------------task={task}--------------')
|
||||||
|
|
||||||
all_docs = TaskDetail.objects.filter(task=task)
|
all_docs = TaskDetail.objects.filter(task=task)
|
||||||
all_docs_list = []
|
all_docs_list = []
|
||||||
for doc in all_docs:
|
for doc in all_docs:
|
||||||
|
|
@ -125,6 +166,9 @@ class TaskViewSet(viewsets.ModelViewSet):
|
||||||
})
|
})
|
||||||
all_docs_json = json.dumps(all_docs_list, ensure_ascii=False)
|
all_docs_json = json.dumps(all_docs_list, ensure_ascii=False)
|
||||||
|
|
||||||
|
|
||||||
|
print(f'----chat--------------all_docs_json={all_docs_json}--------------')
|
||||||
|
|
||||||
SYSTEM_PROMPT = """
|
SYSTEM_PROMPT = """
|
||||||
你是专业文献问答助手。请严格根据提供的任务文档回答用户问题。
|
你是专业文献问答助手。请严格根据提供的任务文档回答用户问题。
|
||||||
任务文档内容已经结构化提供为 JSON 列表,每条文档包含字段:
|
任务文档内容已经结构化提供为 JSON 列表,每条文档包含字段:
|
||||||
|
|
@ -144,6 +188,9 @@ class TaskViewSet(viewsets.ModelViewSet):
|
||||||
|
|
||||||
# 使用 Django 的 StreamingHttpResponse 返回
|
# 使用 Django 的 StreamingHttpResponse 返回
|
||||||
response = StreamingHttpResponse(sync_stream(call_model_stream(messages)), content_type="text/event-stream")
|
response = StreamingHttpResponse(sync_stream(call_model_stream(messages)), content_type="text/event-stream")
|
||||||
|
|
||||||
|
print(f'----chat--------------666666666--------------')
|
||||||
|
|
||||||
return response
|
return response
|
||||||
from rest_framework import status
|
from rest_framework import status
|
||||||
from rest_framework.response import Response
|
from rest_framework.response import Response
|
||||||
|
|
@ -156,6 +203,11 @@ class TaskDetailViewSet(viewsets.ModelViewSet):
|
||||||
search_fields = ['title', 'author', 'site']
|
search_fields = ['title', 'author', 'site']
|
||||||
|
|
||||||
def get_queryset(self):
|
def get_queryset(self):
|
||||||
|
|
||||||
|
|
||||||
|
print(f'----------get_queryset-------------------')
|
||||||
|
print(f'33333333333333')
|
||||||
|
|
||||||
queryset = super().get_queryset()
|
queryset = super().get_queryset()
|
||||||
task_id = self.request.query_params.get('task')
|
task_id = self.request.query_params.get('task')
|
||||||
if task_id and task_id.isdigit():
|
if task_id and task_id.isdigit():
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -1,10 +1,14 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
# entrypoint.sh
|
# entrypoint.sh
|
||||||
|
|
||||||
# 启动 Celery Worker
|
# 启动 Celery 入队 Worker(可以多线程)
|
||||||
echo "Starting Celery..."
|
echo "Starting Celery crawler queue worker..."
|
||||||
celery -A selenium_django worker -l info --pool=solo &
|
celery -A selenium_django worker -Q crawler -l info --pool=threads -c 4 &
|
||||||
|
|
||||||
# 启动 Django
|
# 启动 Celery 爬虫处理 Worker(顺序执行,单线程)
|
||||||
|
echo "Starting Celery crawl_worker (sequential)..."
|
||||||
|
celery -A selenium_django worker -Q crawl_worker -l info --pool=prefork -c 1 &
|
||||||
|
|
||||||
|
# 启动 Django Gunicorn
|
||||||
echo "Starting Django..."
|
echo "Starting Django..."
|
||||||
exec gunicorn selenium_django.wsgi:application --log-level=info --bind 0.0.0.0:8000
|
exec gunicorn selenium_django.wsgi:application --log-level=info --bind 0.0.0.0:8000
|
||||||
|
|
|
||||||
|
|
@ -11,26 +11,26 @@ https://docs.djangoproject.com/en/5.2/ref/settings/
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
import os
|
||||||
# Build paths inside the project like this: BASE_DIR / 'subdir'.
|
# Build paths inside the project like this: BASE_DIR / 'subdir'.
|
||||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||||
|
|
||||||
# Celery 配置
|
# Celery 配置
|
||||||
CELERY_BROKER_URL = 'redis://redis:6379/0'
|
CELERY_BROKER_URL = os.environ.get("CELERY_BROKER_URL", "redis://redis:6379/0")
|
||||||
CELERY_RESULT_BACKEND = 'redis://redis:6379/0'
|
CELERY_RESULT_BACKEND = os.environ.get("CELERY_RESULT_BACKEND", "redis://redis:6379/0")
|
||||||
CELERY_ACCEPT_CONTENT = ['json']
|
CELERY_ACCEPT_CONTENT = ['json']
|
||||||
CELERY_TASK_SERIALIZER = 'json'
|
CELERY_TASK_SERIALIZER = 'json'
|
||||||
CELERY_RESULT_SERIALIZER = 'json'
|
CELERY_RESULT_SERIALIZER = 'json'
|
||||||
CELERY_TIMEZONE = 'Asia/Shanghai' # 根据你本地时区调整
|
CELERY_TIMEZONE = 'Asia/Shanghai' # 根据你本地时区调整
|
||||||
|
|
||||||
# 爬虫api地址
|
# 爬虫api地址
|
||||||
CRAWL_API_URL = "http://47.83.141.164:5001/crawl"
|
CRAWL_API_URL = os.environ.get("CRAWL_API_URL", "http://selenium:5000/crawl")
|
||||||
|
|
||||||
# 模型api配置
|
# 模型api配置
|
||||||
api_info = {
|
api_info = {
|
||||||
"model": "glm-4.5",
|
"model": os.environ.get("API_MODEL", "glm-4.5"), # 默认值可选
|
||||||
"base_url": "https://open.bigmodel.cn/api/paas/v4",
|
"base_url": os.environ.get("API_BASE_URL", "https://open.bigmodel.cn/api/paas/v4"),
|
||||||
"api_key": "ce39bdd4fcf34ec0aec75072bc9ff988.hAp7HZTVUwy7vImn"
|
"api_key": os.environ.get("API_KEY", ""),
|
||||||
}
|
}
|
||||||
|
|
||||||
# Quick-start development settings - unsuitable for production
|
# Quick-start development settings - unsuitable for production
|
||||||
|
|
|
||||||
|
|
@ -4,9 +4,12 @@ FROM node:18-alpine as builder
|
||||||
# 设置工作目录
|
# 设置工作目录
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# 设置构建时环境变量
|
ARG VITE_API_BASE_URL
|
||||||
ARG VITE_API_BASE_URL=${VITE_API_BASE_URL:-http://localhost:8000/api}
|
ARG VITE_CRAWL_URL
|
||||||
ENV VITE_API_BASE_URL=$VITE_API_BASE_URL
|
|
||||||
|
# 设置给构建时 Vite
|
||||||
|
ENV VITE_API_BASE_URL=${VITE_API_BASE_URL}
|
||||||
|
ENV VITE_CRAWL_URL=${VITE_CRAWL_URL}
|
||||||
|
|
||||||
# 复制前端代码
|
# 复制前端代码
|
||||||
COPY frontend-vite/package*.json ./
|
COPY frontend-vite/package*.json ./
|
||||||
|
|
|
||||||
|
|
@ -13,5 +13,3 @@
|
||||||
<script type="module" src="/src/main.js"></script>
|
<script type="module" src="/src/main.js"></script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,15 @@
|
||||||
|
// src/config/API_CONFIG.ts
|
||||||
|
|
||||||
// API配置 - 支持环境变量参数化
|
// API配置 - 支持环境变量参数化
|
||||||
export const API_CONFIG = {
|
export const API_CONFIG = {
|
||||||
// 从环境变量获取API基础URL,如果没有则使用默认值
|
// 访问宿主机映射端口,不要用 Docker 服务名
|
||||||
BASE_URL: import.meta.env.VITE_API_BASE_URL || 'http://localhost:8000/api',
|
BASE_URL: import.meta.env.VITE_API_BASE_URL
|
||||||
CRAWL_URL: import.meta.env.VITE_CRAWL_API_URL || 'http://localhost:5000',
|
? import.meta.env.VITE_API_BASE_URL.replace(/\/$/, "") // 不再拼 /api
|
||||||
|
: "http://47.83.141.164:8002/api", // 默认远程服务器 API 根路径
|
||||||
|
|
||||||
|
CRAWL_URL: import.meta.env.VITE_CRAWL_URL
|
||||||
|
? import.meta.env.VITE_CRAWL_URL.replace(/\/$/, "")
|
||||||
|
: "http://47.83.141.164:5001/crawl", // 默认远程爬虫服务
|
||||||
|
|
||||||
// 超时配置
|
// 超时配置
|
||||||
TIMEOUT: {
|
TIMEOUT: {
|
||||||
|
|
@ -12,38 +19,38 @@ export const API_CONFIG = {
|
||||||
|
|
||||||
// 请求头配置
|
// 请求头配置
|
||||||
HEADERS: {
|
HEADERS: {
|
||||||
'Content-Type': 'application/json',
|
"Content-Type": "application/json",
|
||||||
},
|
},
|
||||||
}
|
};
|
||||||
|
|
||||||
// 动态配置设置(支持运行时修改)
|
// 动态配置设置(支持运行时修改)
|
||||||
export const settings = {
|
export const settings = {
|
||||||
get baseUrl() {
|
get baseUrl() {
|
||||||
return localStorage.getItem('api_base_url') || API_CONFIG.BASE_URL
|
return localStorage.getItem("api_base_url") || API_CONFIG.BASE_URL;
|
||||||
},
|
},
|
||||||
set baseUrl(v) {
|
set baseUrl(v) {
|
||||||
localStorage.setItem('api_base_url', (v || '').replace(/\/$/, ''))
|
localStorage.setItem("api_base_url", (v || "").replace(/\/$/, ""));
|
||||||
},
|
},
|
||||||
|
|
||||||
get crawlUrl() {
|
get crawlUrl() {
|
||||||
return localStorage.getItem('crawl_url') || API_CONFIG.CRAWL_URL
|
return localStorage.getItem("crawl_url") || API_CONFIG.CRAWL_URL;
|
||||||
},
|
},
|
||||||
set crawlUrl(v) {
|
set crawlUrl(v) {
|
||||||
localStorage.setItem('crawl_url', (v || '').replace(/\/$/, ''))
|
localStorage.setItem("crawl_url", (v || "").replace(/\/$/, ""));
|
||||||
},
|
},
|
||||||
}
|
};
|
||||||
|
|
||||||
// 获取当前环境信息
|
// 获取当前环境信息(调试用)
|
||||||
export const getEnvironmentInfo = () => ({
|
export const getEnvironmentInfo = () => ({
|
||||||
NODE_ENV: import.meta.env.NODE_ENV,
|
NODE_ENV: import.meta.env.NODE_ENV,
|
||||||
BASE_URL: import.meta.env.BASE_URL,
|
BASE_URL: import.meta.env.BASE_URL,
|
||||||
API_BASE_URL: import.meta.env.VITE_API_BASE_URL,
|
API_BASE_URL: import.meta.env.VITE_API_BASE_URL,
|
||||||
CRAWL_API_URL: import.meta.env.VITE_CRAWL_API_URL,
|
CRAWL_API_URL: import.meta.env.VITE_CRAWL_URL,
|
||||||
currentBaseUrl: settings.baseUrl,
|
currentBaseUrl: settings.baseUrl,
|
||||||
currentCrawlUrl: settings.crawlUrl,
|
currentCrawlUrl: settings.crawlUrl,
|
||||||
})
|
});
|
||||||
|
|
||||||
// 开发环境调试信息
|
// 开发环境调试信息
|
||||||
if (import.meta.env.DEV) {
|
if (import.meta.env.DEV) {
|
||||||
console.log('🔧 API Configuration:', getEnvironmentInfo())
|
console.log("🔧 API Configuration:", getEnvironmentInfo());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -80,7 +80,7 @@
|
||||||
<div v-for="(it, idx) in filteredItems" :key="it.id || idx" class="panel" style="min-height:180px; display:flex; flex-direction:column; cursor:pointer;" @click="isDone(it.status) ? $router.push(`/tasks/${it.id}`) : null">
|
<div v-for="(it, idx) in filteredItems" :key="it.id || idx" class="panel" style="min-height:180px; display:flex; flex-direction:column; cursor:pointer;" @click="isDone(it.status) ? $router.push(`/tasks/${it.id}`) : null">
|
||||||
<div style="display:flex; align-items:center; justify-content:space-between; gap:8px;">
|
<div style="display:flex; align-items:center; justify-content:space-between; gap:8px;">
|
||||||
<div style="font-weight:700; overflow:hidden; text-overflow:ellipsis; white-space:nowrap; max-width:70%;">{{ it.name || '未命名任务' }}</div>
|
<div style="font-weight:700; overflow:hidden; text-overflow:ellipsis; white-space:nowrap; max-width:70%;">{{ it.name || '未命名任务' }}</div>
|
||||||
<span :style="statusPill(it.status)">{{ statusText(it.status) }}</span>
|
<span :style="statusPill(it.status)">{{ statusText(it.status, it.id) }}</span>
|
||||||
</div>
|
</div>
|
||||||
<div style="color:#6b7280; margin-top:4px; font-size:12px;">{{ it.task_id || '(无ID)' }}</div>
|
<div style="color:#6b7280; margin-top:4px; font-size:12px;">{{ it.task_id || '(无ID)' }}</div>
|
||||||
<div style="color:#374151; margin-top:8px; flex:1; overflow:auto; font-size:13px;">{{ it.description || '(无描述)' }}</div>
|
<div style="color:#374151; margin-top:8px; flex:1; overflow:auto; font-size:13px;">{{ it.description || '(无描述)' }}</div>
|
||||||
|
|
@ -481,9 +481,14 @@ function statusPill(s){
|
||||||
const c = color[s] || '#6b7280'
|
const c = color[s] || '#6b7280'
|
||||||
return { background:bg, color:c, padding:'4px 10px', borderRadius:'999px', fontWeight:'600', fontSize:'12px' }
|
return { background:bg, color:c, padding:'4px 10px', borderRadius:'999px', fontWeight:'600', fontSize:'12px' }
|
||||||
}
|
}
|
||||||
function statusText(s){
|
function statusText(s, id){
|
||||||
|
const local = localStatusMap.value[id]
|
||||||
|
if (local === 'running') return '运行中' // ✅ 强制运行中
|
||||||
|
if (local === 'done') return '完成'
|
||||||
|
if (local === 'failed') return '失败'
|
||||||
const map = {
|
const map = {
|
||||||
running:'运行中',
|
running:'运行中',
|
||||||
|
queued:'运行中',
|
||||||
idle:'空闲中',
|
idle:'空闲中',
|
||||||
done:'完成',
|
done:'完成',
|
||||||
failed:'失败',
|
failed:'失败',
|
||||||
|
|
@ -601,14 +606,14 @@ async function runTasksConcurrently(tasks) {
|
||||||
await fetchTasks()
|
await fetchTasks()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const localStatusMap = ref({}) // { [taskId]: 'running' | 'done' | null }
|
||||||
// 手动运行任务
|
// 手动运行任务
|
||||||
async function runNow(task) {
|
async function runNow(task) {
|
||||||
// 防重复点击
|
// 防重复点击
|
||||||
if (runningIds.value.has(task.id)) return
|
if (runningIds.value.has(task.id)) return
|
||||||
runningIds.value.add(task.id)
|
runningIds.value.add(task.id)
|
||||||
|
localStatusMap.value[task.id] = 'running' // ✅ 强制锁定为运行中
|
||||||
|
|
||||||
// 前端立即显示运行中状态
|
|
||||||
task.status = 'running'
|
|
||||||
// 不立即调用 fetchTasks,避免被后端数据覆盖
|
// 不立即调用 fetchTasks,避免被后端数据覆盖
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
@ -650,6 +655,7 @@ async function runNow(task) {
|
||||||
console.log(`任务 ${task.id} 达到终止状态: ${currentTask.status},停止轮询`)
|
console.log(`任务 ${task.id} 达到终止状态: ${currentTask.status},停止轮询`)
|
||||||
clearInterval(interval)
|
clearInterval(interval)
|
||||||
runningIds.value.delete(task.id)
|
runningIds.value.delete(task.id)
|
||||||
|
localStatusMap.value[task.id] = data.status // ✅ 解锁并设置为最终状态
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
console.error(`轮询中未找到任务 ${task.id}`)
|
console.error(`轮询中未找到任务 ${task.id}`)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue