Files
kami_apple_exchage/backend/app/services/health_service.py
danial 8ad2a5366a refactor(backend): 将Celery替换为Arq进行协程任务处理
本次提交将后端的任务队列系统从Celery迁移到了Arq,以支持基于协程的任务处理。主要改动包括:
- 更新文档和配置文件,反映架构变化。
- 修改健康检查和服务初始化逻辑,以适应Arq的使用。
- 移除与Celery相关的代码,并添加Arq任务定义和调度器。
- 更新Dockerfile和相关脚本,确保Arq worker能够正确运行。
- 调整API和业务服务中的任务处理逻辑,移除对Celery的依赖。

这些改动旨在提高系统的异步处理能力和整体性能。
2025-09-18 16:02:05 +08:00

344 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
健康检查服务
提供各种健康检查功能的业务逻辑
"""
import asyncio
from datetime import datetime
from typing import Any
from sqlalchemy import text
from app.core.database import db_manager
from app.core.log import get_logger
from app.core.redis_manager import redis_manager
from app.core.state_manager import StateType, state_manager, TaskState
from app.enums.task import OrderTaskStatus
from app.services.file_service import file_service
logger = get_logger(__name__)
class HealthService:
"""健康检查服务类"""
@staticmethod
async def get_readiness_status() -> dict[str, Any]:
"""获取就绪状态 - 用于Kubernetes readiness探针"""
health_checks = {
"redis": False,
"database": False,
"arq": False,
"file_storage": False,
}
errors = []
# 检查Redis连接
try:
redis_client = await redis_manager.get_redis()
if redis_client:
await redis_client.ping()
health_checks["redis"] = True
else:
errors.append("Redis client not initialized")
except Exception as e:
errors.append(f"Redis check failed: {str(e)}")
# 检查数据库连接
try:
async with db_manager.get_async_session() as session:
await session.execute(text("SELECT 1"))
health_checks["database"] = True
except Exception as e:
errors.append(f"Database check failed: {str(e)}")
# 检查Arq连接 - 简单检查Redis连接即可
try:
# Arq使用Redis所以Redis连接正常就认为Arq健康
health_checks["arq"] = health_checks["redis"]
if not health_checks["arq"]:
errors.append("Arq health check failed (Redis not available)")
except Exception as e:
errors.append(f"Arq check failed: {str(e)}")
# 检查文件存储
try:
if await file_service.check_storage_health():
health_checks["file_storage"] = True
else:
errors.append("File storage check failed")
except Exception as e:
errors.append(f"File storage check failed: {str(e)}")
all_healthy = all(health_checks.values())
result = {
"status": "ready" if all_healthy else "not_ready",
"timestamp": datetime.now().isoformat(),
"checks": health_checks,
"healthy": all_healthy,
}
if errors:
result["errors"] = errors
return result
@staticmethod
async def get_startup_status() -> dict[str, Any]:
"""获取启动状态 - 用于Kubernetes startup探针"""
startup_checks = {
"redis_initialized": False,
"database_migrated": False,
"arq_ready": False,
"storage_ready": False,
}
errors = []
# 检查Redis是否初始化
try:
redis_client = await redis_manager.get_redis()
if redis_client:
await redis_client.ping()
startup_checks["redis_initialized"] = True
else:
errors.append("Redis not initialized")
except Exception as e:
errors.append(f"Redis initialization check failed: {str(e)}")
# 检查数据库是否可用
try:
async with db_manager.get_async_session() as session:
await session.execute(text("SELECT 1"))
startup_checks["database_migrated"] = True
except Exception as e:
errors.append(f"Database migration check failed: {str(e)}")
# 检查Arq是否准备好 - 简单检查Redis即可
try:
startup_checks["arq_ready"] = startup_checks["redis_initialized"]
if not startup_checks["arq_ready"]:
errors.append("Arq startup check failed (Redis not ready)")
except Exception as e:
errors.append(f"Arq startup check failed: {str(e)}")
# 检查存储是否准备好
try:
if await file_service.check_storage_health():
startup_checks["storage_ready"] = True
else:
errors.append("Storage startup check failed")
except Exception as e:
errors.append(f"Storage startup check failed: {str(e)}")
all_started = all(startup_checks.values())
result = {
"status": "started" if all_started else "not_started",
"timestamp": datetime.now().isoformat(),
"checks": startup_checks,
"started": all_started,
}
if errors:
result["errors"] = errors
return result
@staticmethod
async def get_liveness_status() -> dict[str, Any]:
"""获取存活状态 - 用于Kubernetes liveness探针"""
return await HealthService.get_readiness_status()
@staticmethod
async def get_health_report() -> dict[str, Any]:
"""获取完整的健康报告"""
health_report = {
"overall_status": "healthy",
"timestamp": datetime.now().isoformat(),
"components": {},
"issues": [],
}
issues = []
# Redis健康检查
try:
redis_client = await redis_manager.get_redis()
if redis_client:
info = await redis_client.info()
health_report["components"]["redis"] = {
"status": "healthy",
"version": info.get("redis_version", "unknown"),
"used_memory": info.get("used_memory", 0),
"connected_clients": info.get("connected_clients", 0),
}
else:
health_report["components"]["redis"] = {
"status": "unhealthy",
"error": "Redis client not available",
}
issues.append("Redis: client not available")
except Exception as e:
health_report["components"]["redis"] = {
"status": "unhealthy",
"error": str(e),
}
issues.append(f"Redis: {str(e)}")
# 数据库健康检查
try:
async with db_manager.get_async_session() as session:
result = await session.execute(text("SELECT version()"))
version = result.scalar()
health_report["components"]["database"] = {
"status": "healthy",
"version": version,
"connection_pool": "active",
}
except Exception as e:
health_report["components"]["database"] = {
"status": "unhealthy",
"error": str(e),
}
issues.append(f"Database: {str(e)}")
# Arq健康检查 - 基于Redis状态
try:
redis_status = health_report["components"]["redis"].get("status")
if redis_status == "healthy":
health_report["components"]["arq"] = {
"status": "healthy",
"redis_connected": True,
"message": "Arq uses Redis for queue management",
}
else:
health_report["components"]["arq"] = {
"status": "unhealthy",
"redis_connected": False,
"error": "Redis not available for Arq",
}
issues.append("Arq: Redis not available")
except Exception as e:
health_report["components"]["arq"] = {
"status": "unhealthy",
"error": str(e),
}
issues.append(f"Arq: {str(e)}")
# 文件存储健康检查
try:
storage_health = await file_service.check_storage_health()
health_report["components"]["file_storage"] = {
"status": "healthy" if storage_health else "unhealthy",
"writable": storage_health,
}
if not storage_health:
issues.append("File storage: not writable")
except Exception as e:
health_report["components"]["file_storage"] = {
"status": "unhealthy",
"error": str(e),
}
issues.append(f"File storage: {str(e)}")
# 设置整体状态
if issues:
health_report["overall_status"] = (
"degraded"
if len(issues) < len(health_report["components"])
else "unhealthy"
)
health_report["issues"] = issues
return health_report
@staticmethod
async def get_system_metrics() -> dict[str, Any]:
"""获取系统指标"""
try:
# 获取Redis中的任务统计
task_stats = await HealthService._get_task_statistics()
metrics = {
"timestamp": datetime.now().isoformat(),
"task_stats": task_stats,
"system": await HealthService._get_system_info(),
}
# 添加Redis指标
try:
redis_client = await redis_manager.get_redis()
if redis_client:
info = await redis_client.info()
metrics["redis"] = {
"used_memory": info.get("used_memory", 0),
"connected_clients": info.get("connected_clients", 0),
"keyspace_hits": info.get("keyspace_hits", 0),
"keyspace_misses": info.get("keyspace_misses", 0),
}
except Exception:
pass
return metrics
except Exception as e:
logger.error(f"获取系统指标失败: {e}")
return {
"timestamp": datetime.now().isoformat(),
"error": str(e),
}
@staticmethod
async def _get_task_statistics() -> dict[str, Any]:
"""获取任务统计信息"""
try:
# 从状态管理器获取任务统计
task_stats = await state_manager.get_task_statistics()
return {
"total_tasks": task_stats.total_tasks,
"completed_tasks": task_stats.completed_tasks,
"failed_tasks": task_stats.failed_tasks,
"pending_tasks": task_stats.pending_tasks,
"running_tasks": task_stats.running_tasks,
"success_rate": (
task_stats.completed_tasks / task_stats.total_tasks * 100
if task_stats.total_tasks > 0
else 0
),
}
except Exception as e:
logger.error(f"获取任务统计失败: {e}")
return {
"total_tasks": 0,
"completed_tasks": 0,
"failed_tasks": 0,
"pending_tasks": 0,
"running_tasks": 0,
"success_rate": 0,
"error": str(e),
}
@staticmethod
async def _get_system_info() -> dict[str, Any]:
"""获取系统信息"""
import os
import psutil
try:
process = psutil.Process(os.getpid())
return {
"cpu_percent": process.cpu_percent(),
"memory_usage": process.memory_info().rss,
"thread_count": process.num_threads(),
"open_files": len(process.open_files()),
}
except Exception:
return {
"cpu_percent": 0,
"memory_usage": 0,
"thread_count": 0,
"open_files": 0,
}