mirror of
https://git.oceanpay.cc/danial/kami_apple_exchage.git
synced 2025-12-18 22:29:09 +00:00
本次提交将后端的任务队列系统从Celery迁移到了Arq,以支持基于协程的任务处理。主要改动包括: - 更新文档和配置文件,反映架构变化。 - 修改健康检查和服务初始化逻辑,以适应Arq的使用。 - 移除与Celery相关的代码,并添加Arq任务定义和调度器。 - 更新Dockerfile和相关脚本,确保Arq worker能够正确运行。 - 调整API和业务服务中的任务处理逻辑,移除对Celery的依赖。 这些改动旨在提高系统的异步处理能力和整体性能。
344 lines
12 KiB
Python
344 lines
12 KiB
Python
"""
|
||
健康检查服务
|
||
提供各种健康检查功能的业务逻辑
|
||
"""
|
||
|
||
import asyncio
|
||
from datetime import datetime
|
||
from typing import Any
|
||
|
||
from sqlalchemy import text
|
||
|
||
from app.core.database import db_manager
|
||
from app.core.log import get_logger
|
||
from app.core.redis_manager import redis_manager
|
||
from app.core.state_manager import StateType, state_manager, TaskState
|
||
from app.enums.task import OrderTaskStatus
|
||
from app.services.file_service import file_service
|
||
|
||
logger = get_logger(__name__)
|
||
|
||
|
||
class HealthService:
|
||
"""健康检查服务类"""
|
||
|
||
@staticmethod
|
||
async def get_readiness_status() -> dict[str, Any]:
|
||
"""获取就绪状态 - 用于Kubernetes readiness探针"""
|
||
health_checks = {
|
||
"redis": False,
|
||
"database": False,
|
||
"arq": False,
|
||
"file_storage": False,
|
||
}
|
||
|
||
errors = []
|
||
|
||
# 检查Redis连接
|
||
try:
|
||
redis_client = await redis_manager.get_redis()
|
||
if redis_client:
|
||
await redis_client.ping()
|
||
health_checks["redis"] = True
|
||
else:
|
||
errors.append("Redis client not initialized")
|
||
except Exception as e:
|
||
errors.append(f"Redis check failed: {str(e)}")
|
||
|
||
# 检查数据库连接
|
||
try:
|
||
async with db_manager.get_async_session() as session:
|
||
await session.execute(text("SELECT 1"))
|
||
health_checks["database"] = True
|
||
except Exception as e:
|
||
errors.append(f"Database check failed: {str(e)}")
|
||
|
||
# 检查Arq连接 - 简单检查Redis连接即可
|
||
try:
|
||
# Arq使用Redis,所以Redis连接正常就认为Arq健康
|
||
health_checks["arq"] = health_checks["redis"]
|
||
if not health_checks["arq"]:
|
||
errors.append("Arq health check failed (Redis not available)")
|
||
except Exception as e:
|
||
errors.append(f"Arq check failed: {str(e)}")
|
||
|
||
# 检查文件存储
|
||
try:
|
||
if await file_service.check_storage_health():
|
||
health_checks["file_storage"] = True
|
||
else:
|
||
errors.append("File storage check failed")
|
||
except Exception as e:
|
||
errors.append(f"File storage check failed: {str(e)}")
|
||
|
||
all_healthy = all(health_checks.values())
|
||
|
||
result = {
|
||
"status": "ready" if all_healthy else "not_ready",
|
||
"timestamp": datetime.now().isoformat(),
|
||
"checks": health_checks,
|
||
"healthy": all_healthy,
|
||
}
|
||
|
||
if errors:
|
||
result["errors"] = errors
|
||
|
||
return result
|
||
|
||
@staticmethod
|
||
async def get_startup_status() -> dict[str, Any]:
|
||
"""获取启动状态 - 用于Kubernetes startup探针"""
|
||
startup_checks = {
|
||
"redis_initialized": False,
|
||
"database_migrated": False,
|
||
"arq_ready": False,
|
||
"storage_ready": False,
|
||
}
|
||
|
||
errors = []
|
||
|
||
# 检查Redis是否初始化
|
||
try:
|
||
redis_client = await redis_manager.get_redis()
|
||
if redis_client:
|
||
await redis_client.ping()
|
||
startup_checks["redis_initialized"] = True
|
||
else:
|
||
errors.append("Redis not initialized")
|
||
except Exception as e:
|
||
errors.append(f"Redis initialization check failed: {str(e)}")
|
||
|
||
# 检查数据库是否可用
|
||
try:
|
||
async with db_manager.get_async_session() as session:
|
||
await session.execute(text("SELECT 1"))
|
||
startup_checks["database_migrated"] = True
|
||
except Exception as e:
|
||
errors.append(f"Database migration check failed: {str(e)}")
|
||
|
||
# 检查Arq是否准备好 - 简单检查Redis即可
|
||
try:
|
||
startup_checks["arq_ready"] = startup_checks["redis_initialized"]
|
||
if not startup_checks["arq_ready"]:
|
||
errors.append("Arq startup check failed (Redis not ready)")
|
||
except Exception as e:
|
||
errors.append(f"Arq startup check failed: {str(e)}")
|
||
|
||
# 检查存储是否准备好
|
||
try:
|
||
if await file_service.check_storage_health():
|
||
startup_checks["storage_ready"] = True
|
||
else:
|
||
errors.append("Storage startup check failed")
|
||
except Exception as e:
|
||
errors.append(f"Storage startup check failed: {str(e)}")
|
||
|
||
all_started = all(startup_checks.values())
|
||
|
||
result = {
|
||
"status": "started" if all_started else "not_started",
|
||
"timestamp": datetime.now().isoformat(),
|
||
"checks": startup_checks,
|
||
"started": all_started,
|
||
}
|
||
|
||
if errors:
|
||
result["errors"] = errors
|
||
|
||
return result
|
||
|
||
@staticmethod
|
||
async def get_liveness_status() -> dict[str, Any]:
|
||
"""获取存活状态 - 用于Kubernetes liveness探针"""
|
||
return await HealthService.get_readiness_status()
|
||
|
||
@staticmethod
|
||
async def get_health_report() -> dict[str, Any]:
|
||
"""获取完整的健康报告"""
|
||
health_report = {
|
||
"overall_status": "healthy",
|
||
"timestamp": datetime.now().isoformat(),
|
||
"components": {},
|
||
"issues": [],
|
||
}
|
||
|
||
issues = []
|
||
|
||
# Redis健康检查
|
||
try:
|
||
redis_client = await redis_manager.get_redis()
|
||
if redis_client:
|
||
info = await redis_client.info()
|
||
health_report["components"]["redis"] = {
|
||
"status": "healthy",
|
||
"version": info.get("redis_version", "unknown"),
|
||
"used_memory": info.get("used_memory", 0),
|
||
"connected_clients": info.get("connected_clients", 0),
|
||
}
|
||
else:
|
||
health_report["components"]["redis"] = {
|
||
"status": "unhealthy",
|
||
"error": "Redis client not available",
|
||
}
|
||
issues.append("Redis: client not available")
|
||
except Exception as e:
|
||
health_report["components"]["redis"] = {
|
||
"status": "unhealthy",
|
||
"error": str(e),
|
||
}
|
||
issues.append(f"Redis: {str(e)}")
|
||
|
||
# 数据库健康检查
|
||
try:
|
||
async with db_manager.get_async_session() as session:
|
||
result = await session.execute(text("SELECT version()"))
|
||
version = result.scalar()
|
||
health_report["components"]["database"] = {
|
||
"status": "healthy",
|
||
"version": version,
|
||
"connection_pool": "active",
|
||
}
|
||
except Exception as e:
|
||
health_report["components"]["database"] = {
|
||
"status": "unhealthy",
|
||
"error": str(e),
|
||
}
|
||
issues.append(f"Database: {str(e)}")
|
||
|
||
# Arq健康检查 - 基于Redis状态
|
||
try:
|
||
redis_status = health_report["components"]["redis"].get("status")
|
||
if redis_status == "healthy":
|
||
health_report["components"]["arq"] = {
|
||
"status": "healthy",
|
||
"redis_connected": True,
|
||
"message": "Arq uses Redis for queue management",
|
||
}
|
||
else:
|
||
health_report["components"]["arq"] = {
|
||
"status": "unhealthy",
|
||
"redis_connected": False,
|
||
"error": "Redis not available for Arq",
|
||
}
|
||
issues.append("Arq: Redis not available")
|
||
except Exception as e:
|
||
health_report["components"]["arq"] = {
|
||
"status": "unhealthy",
|
||
"error": str(e),
|
||
}
|
||
issues.append(f"Arq: {str(e)}")
|
||
|
||
# 文件存储健康检查
|
||
try:
|
||
storage_health = await file_service.check_storage_health()
|
||
health_report["components"]["file_storage"] = {
|
||
"status": "healthy" if storage_health else "unhealthy",
|
||
"writable": storage_health,
|
||
}
|
||
if not storage_health:
|
||
issues.append("File storage: not writable")
|
||
except Exception as e:
|
||
health_report["components"]["file_storage"] = {
|
||
"status": "unhealthy",
|
||
"error": str(e),
|
||
}
|
||
issues.append(f"File storage: {str(e)}")
|
||
|
||
# 设置整体状态
|
||
if issues:
|
||
health_report["overall_status"] = (
|
||
"degraded"
|
||
if len(issues) < len(health_report["components"])
|
||
else "unhealthy"
|
||
)
|
||
health_report["issues"] = issues
|
||
|
||
return health_report
|
||
|
||
@staticmethod
|
||
async def get_system_metrics() -> dict[str, Any]:
|
||
"""获取系统指标"""
|
||
try:
|
||
# 获取Redis中的任务统计
|
||
task_stats = await HealthService._get_task_statistics()
|
||
|
||
metrics = {
|
||
"timestamp": datetime.now().isoformat(),
|
||
"task_stats": task_stats,
|
||
"system": await HealthService._get_system_info(),
|
||
}
|
||
|
||
# 添加Redis指标
|
||
try:
|
||
redis_client = await redis_manager.get_redis()
|
||
if redis_client:
|
||
info = await redis_client.info()
|
||
metrics["redis"] = {
|
||
"used_memory": info.get("used_memory", 0),
|
||
"connected_clients": info.get("connected_clients", 0),
|
||
"keyspace_hits": info.get("keyspace_hits", 0),
|
||
"keyspace_misses": info.get("keyspace_misses", 0),
|
||
}
|
||
except Exception:
|
||
pass
|
||
|
||
return metrics
|
||
|
||
except Exception as e:
|
||
logger.error(f"获取系统指标失败: {e}")
|
||
return {
|
||
"timestamp": datetime.now().isoformat(),
|
||
"error": str(e),
|
||
}
|
||
|
||
@staticmethod
|
||
async def _get_task_statistics() -> dict[str, Any]:
|
||
"""获取任务统计信息"""
|
||
try:
|
||
# 从状态管理器获取任务统计
|
||
task_stats = await state_manager.get_task_statistics()
|
||
return {
|
||
"total_tasks": task_stats.total_tasks,
|
||
"completed_tasks": task_stats.completed_tasks,
|
||
"failed_tasks": task_stats.failed_tasks,
|
||
"pending_tasks": task_stats.pending_tasks,
|
||
"running_tasks": task_stats.running_tasks,
|
||
"success_rate": (
|
||
task_stats.completed_tasks / task_stats.total_tasks * 100
|
||
if task_stats.total_tasks > 0
|
||
else 0
|
||
),
|
||
}
|
||
except Exception as e:
|
||
logger.error(f"获取任务统计失败: {e}")
|
||
return {
|
||
"total_tasks": 0,
|
||
"completed_tasks": 0,
|
||
"failed_tasks": 0,
|
||
"pending_tasks": 0,
|
||
"running_tasks": 0,
|
||
"success_rate": 0,
|
||
"error": str(e),
|
||
}
|
||
|
||
@staticmethod
|
||
async def _get_system_info() -> dict[str, Any]:
|
||
"""获取系统信息"""
|
||
import os
|
||
import psutil
|
||
|
||
try:
|
||
process = psutil.Process(os.getpid())
|
||
return {
|
||
"cpu_percent": process.cpu_percent(),
|
||
"memory_usage": process.memory_info().rss,
|
||
"thread_count": process.num_threads(),
|
||
"open_files": len(process.open_files()),
|
||
}
|
||
except Exception:
|
||
return {
|
||
"cpu_percent": 0,
|
||
"memory_usage": 0,
|
||
"thread_count": 0,
|
||
"open_files": 0,
|
||
} |