Files
kami_apple_exchage/backend/app/tasks/crawler_tasks.py
danial 8ad2a5366a refactor(backend): 将Celery替换为Arq进行协程任务处理
本次提交将后端的任务队列系统从Celery迁移到了Arq,以支持基于协程的任务处理。主要改动包括:
- 更新文档和配置文件,反映架构变化。
- 修改健康检查和服务初始化逻辑,以适应Arq的使用。
- 移除与Celery相关的代码,并添加Arq任务定义和调度器。
- 更新Dockerfile和相关脚本,确保Arq worker能够正确运行。
- 调整API和业务服务中的任务处理逻辑,移除对Celery的依赖。

这些改动旨在提高系统的异步处理能力和整体性能。
2025-09-18 16:02:05 +08:00

319 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
DEPRECATED: This file has been replaced with Arq for coroutine-based task processing.
Please use app/tasks/crawler_tasks_arq.py instead.
"""
raise ImportError("Celery has been replaced with Arq. Please update your imports to use app.tasks.crawler_tasks_arq")
from app.core.database import db_manager
from app.core.distributed_lock import get_lock
from app.core.log import get_logger
from app.core.redis_manager import redis_manager
from app.core.state_manager import task_state_manager
from app.enums.task import OrderTaskStatus
from app.models.orders import OrderStatus
from app.repositories.order_repository import OrderRepository
from app.services.link_service import LinksService
from app.services.playwright_service import AppleOrderProcessor
from app.services.user_data_service import UserDataService
from app.tasks.utils import execute_async_task
logger = get_logger(__name__)
@celery_app.task(bind=True, name="app.tasks.crawler_tasks.process_apple_order")
def process_apple_order(self, order_id: str, count=3) -> dict[str, Any] | None:
"""
处理Apple订单任务
支持分布式锁定和进度跟踪
Args:
order_id: 订单ID
count: 重试次数
Returns:
dict[str, Any]: 处理结果
"""
task_id = getattr(current_task.request, "id", "unknown")
logger.info(f"开始处理Apple订单: task_id={task_id}, order_id={order_id}")
try:
# 使用现有的事件循环运行异步任务
result = execute_async_task(_process_apple_order_async(self, order_id, task_id))
return result
except Exception as e:
# 重试
logger.error(f"运行异步任务失败: {traceback.format_exc()}")
if count <= 0:
logger.error(f"Apple订单处理失败重试次数耗尽: order_id={order_id}")
process_apple_order.delay(order_id, count=count - 1)
async def _process_apple_order_async(
task_instance, order_id: str, task_id: str
) -> dict[str, Any]:
"""异步处理Apple订单"""
# 检查任务是否暂停
if await redis_manager.is_task_paused():
_, reason = await redis_manager.get_task_pause_state()
logger.info(f"任务已暂停,跳过订单处理: {order_id}, 原因: {reason}")
time.sleep(10) # 等待一段时间以防止频繁检查
process_apple_order.retry(countdown=60, max_retries=3) # 1分钟后重试
return {
"success": False,
"is_paused": True,
"pause_reason": reason,
"message": "任务已暂停",
"order_id": order_id,
}
# 获取分布式锁
lock_key = f"apple_order_processing:{order_id}"
lock = get_lock(
key=lock_key,
timeout=1800, # 30分钟超时
retry_times=5,
retry_delay=1.0,
auto_extend=True,
extend_interval=120, # 每2分钟延长一次
)
try:
# 尝试获取锁
if not await lock.acquire():
logger.warning(f"无法获取订单锁: {order_id}")
raise Retry(f"Apple订单 {order_id} 正在被其他worker处理")
logger.info(f"成功获取Apple订单锁: {order_id}")
# 设置初始任务状态
await task_state_manager.set_task_state(
task_id=task_id,
status=OrderTaskStatus.RUNNING,
worker_id=order_id,
progress=0.0,
started_at=datetime.now().timestamp(),
)
# 创建Apple订单处理器
processor = AppleOrderProcessor(task_id, order_id)
# 执行订单处理
result = await processor.process_order()
# 更新最终状态
db_status = OrderStatus.SUCCESS
if result.get("success"):
logger.info(f"Apple订单处理成功: {order_id}")
await task_state_manager.complete_task(task_id)
else:
db_status = OrderStatus.FAILURE
await task_state_manager.fail_task(task_id, result.get("error", "未知错误"))
logger.error(f"Apple订单处理失败: {order_id}, error: {result.get('error')}")
async with db_manager.get_async_session() as session:
order_repo = OrderRepository(session)
await order_repo.update_by_id(
order_id,
status=db_status,
failure_reason=result.get("error"),
completed_at=datetime.now(),
)
return {}
except Exception as e:
logger.error(f"处理Apple订单异常: {order_id}, error: {traceback.format_exc()}")
# 更新任务状态为失败
await task_state_manager.fail_task(task_id, str(e))
# 更新订单状态为失败
try:
async with db_manager.get_async_session() as session:
order_repo = OrderRepository(session)
await order_repo.update_by_id(
order_id,
status=OrderStatus.FAILURE,
failure_reason=str(e),
completed_at=datetime.now(),
)
except Exception as db_error:
logger.error(f"更新订单状态失败: {db_error}")
# 如果是可重试的异常抛出Retry
if isinstance(e, (ConnectionError, TimeoutError, WorkerLostError)):
raise task_instance.retry(exc=e, countdown=120, max_retries=2)
return {"success": False, "error": str(e), "order_id": order_id}
finally:
# 释放锁
await lock.release()
logger.info(f"释放Apple订单锁: {order_id}")
@celery_app.task(name="app.tasks.crawler_tasks.batch_process_orders")
def batch_process_orders():
"""
批量处理订单任务
Args:
Returns:
dict[str, Any]: 批量处理结果
"""
# 检查是否已有事件循环在运行,使用正确的事件循环执行异步任务
try:
# 使用现有的事件循环运行异步任务
execute_async_task(_batch_process_orders_async())
except Exception as e:
logger.error(f"运行批量处理异步任务失败: {e}")
raise
async def _batch_process_orders_async():
"""异步批量处理订单"""
# 检查任务是否暂停
if await redis_manager.is_task_paused():
is_paused, reason = await redis_manager.get_task_pause_state()
return None
# 获取数据库会话
async with db_manager.get_async_session() as session:
# 如果order_ids为None获取所有pending状态的订单
from app.services.order_business_service import OrderService
order_service = OrderService(session)
user_service = UserDataService(session)
links_service = LinksService(session)
while True:
user_data_id = await redis_manager.get_user_data_id()
if not user_data_id:
return None
# 检查用户数据是否存在
user_info = await user_service.get_user_info(user_data_id)
if not user_info:
return None
# 检查是否可以获取到链接
link_info = await links_service.get_next_link_from_pool()
if not link_info:
return None
try:
# 开始创建订单
order_id = await order_service.create_order(
user_data_id, link_info.link.id
)
process_apple_order.delay(order_id)
except Exception as e:
logger.error(f"创建订单失败: {e}")
# @celery_app.task(name="app.tasks.crawler_tasks.recover_stalled_orders")
# def recover_stalled_orders() -> dict[str, Any]:
# """
# 恢复停滞的订单任务
# 检查长时间处理中的订单并重新提交
# """
# loop = asyncio.new_event_loop()
# asyncio.set_event_loop(loop)
# try:
# return loop.run_until_complete(_recover_stalled_orders_async())
# finally:
# loop.close()
# async def _recover_stalled_orders_async() -> dict[str, Any]:
# """异步恢复停滞的订单任务"""
# logger.info("开始恢复停滞的订单任务")
# try:
# # 查找处理中但长时间未完成的订单
# timeout_threshold = datetime.now() - timedelta(minutes=45)
# async with db_manager.get_async_session() as session:
# order_repo = OrderRepository(session)
# from sqlalchemy import select, and_
# from app.models.orders import Orders
# result = await session.execute(
# select(Orders).where(
# and_(
# Orders.status == OrderResultStatus.PROCESSING,
# Orders.updated_at < timeout_threshold,
# )
# )
# )
# stalled_orders = result.scalars().all()
# recovered_count = 0
# failed_count = 0
# for order in stalled_orders:
# try:
# # 重新提交任务
# task = process_apple_order.delay(str(order.id))
# logger.info(
# f"重新提交停滞订单任务: order_id={order.id}, task_id={task.id}"
# )
# recovered_count += 1
# except Exception as e:
# logger.error(f"恢复停滞订单任务失败: order_id={order.id}, error={e}")
# failed_count += 1
# return {
# "success": True,
# "recovered_count": recovered_count,
# "failed_count": failed_count,
# "total_stalled_orders": len(stalled_orders),
# "recovered_at": datetime.now().isoformat(),
# }
# except Exception as e:
# logger.error(f"恢复停滞任务异常: {e}")
# return {"success": False, "error": str(e)}
# @celery_app.task(name="app.tasks.crawler_tasks.cleanup_completed_orders")
# def cleanup_completed_orders() -> dict[str, Any]:
# """
# 清理已完成的订单任务状态
# 删除Redis中的过期任务状态
# """
# loop = asyncio.new_event_loop()
# asyncio.set_event_loop(loop)
# try:
# return loop.run_until_complete(_cleanup_completed_orders_async())
# finally:
# loop.close()
# async def _cleanup_completed_orders_async() -> dict[str, Any]:
# """异步清理已完成的订单任务状态"""
# logger.info("开始清理已完成的订单任务状态")
# try:
# # 清理超过24小时的已完成任务状态
# from app.core.state_manager import StateType
# await task_state_manager.state_manager.cleanup_expired_states(StateType.TASK)
# return {
# "success": True,
# "cleaned_at": datetime.now().isoformat(),
# "message": "已清理过期的任务状态",
# }
# except Exception as e:
# logger.error(f"清理任务状态异常: {e}")
# return {"success": False, "error": str(e)}