Files
kami_spider_monorepo/core/clients/base.py
danial aebc83edc9 feat(clients): 添加第三方 API 客户端基础库及示例实现
- 新增 BaseAPIClient 抽象基类,提供连接池管理、自动重试、超时控制、日志记录和链路追踪功能
- 实现基于 httpx 的 HTTPClient,支持异步请求、JSON 和表单数据、连接池优化
- 提供示例客户端 ExampleAPIClient,展示如何继承自定义第三方服务客户端
- 编写详细的第三方 API 客户端使用指南文档,包含模块划分、核心组件、快速开始及最佳实践
- 集成 OpenTelemetry 追踪,实现请求全链路追踪和错误记录
- 支持 FastAPI 依赖注入和应用生命周期管理客户端实例
- 完善自动重试策略,包含指数退避和可重试异常分类
- 实现敏感请求头自动脱敏,防止日志泄露敏感数据
- 增加客户端健康检查接口,验证服务可用性
- 编写完整单元测试,覆盖客户端初始化、请求发送、重试逻辑及上下文管理器用法
2025-11-01 15:00:18 +08:00

253 lines
7.3 KiB
Python

"""
Base API client with common functionality.
Provides:
- Connection pooling
- Automatic retry with exponential backoff
- Timeout control
- Request/response logging
- OpenTelemetry tracing integration
- Error handling
"""
import asyncio
from typing import Any, Optional, Callable
from abc import ABC, abstractmethod
import httpx
from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode
from observability.logging import get_logger
logger = get_logger(__name__)
tracer = trace.get_tracer(__name__)
class BaseAPIClient(ABC):
"""
Abstract base class for all API clients.
Provides common functionality like retry logic, timeout handling,
logging, and tracing integration.
"""
def __init__(
self,
base_url: str,
timeout: float = 30.0,
max_retries: int = 3,
retry_delay: float = 1.0,
retry_backoff: float = 2.0,
trace_enabled: bool = True,
) -> None:
"""
Initialize base API client.
Args:
base_url: Base URL for the API
timeout: Request timeout in seconds
max_retries: Maximum number of retry attempts
retry_delay: Initial retry delay in seconds
retry_backoff: Exponential backoff multiplier
trace_enabled: Enable OpenTelemetry tracing
"""
self.base_url = base_url.rstrip("/")
self.timeout = timeout
self.max_retries = max_retries
self.retry_delay = retry_delay
self.retry_backoff = retry_backoff
self.trace_enabled = trace_enabled
self._client: Optional[httpx.AsyncClient] = None
@abstractmethod
async def _get_client(self) -> httpx.AsyncClient:
"""
Get or create HTTP client instance.
Subclasses must implement this to provide their own client configuration.
Returns:
httpx.AsyncClient: Configured async HTTP client
"""
pass
async def close(self) -> None:
"""Close the HTTP client and cleanup resources."""
if self._client is not None:
await self._client.aclose()
self._client = None
logger.info(f"{self.__class__.__name__} client closed")
async def __aenter__(self) -> "BaseAPIClient":
"""Async context manager entry."""
return self
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
"""Async context manager exit."""
await self.close()
async def _retry_with_backoff(
self,
func: Callable,
*args: Any,
**kwargs: Any
) -> Any:
"""
Execute function with retry and exponential backoff.
Args:
func: Async function to execute
*args: Positional arguments for the function
**kwargs: Keyword arguments for the function
Returns:
Result from the function
Raises:
Exception: Last exception if all retries fail
"""
last_exception = None
delay = self.retry_delay
for attempt in range(self.max_retries):
try:
return await func(*args, **kwargs)
except (httpx.TimeoutException, httpx.NetworkError, httpx.RemoteProtocolError) as e:
last_exception = e
if attempt < self.max_retries - 1:
logger.warning(
f"Request failed (attempt {attempt + 1}/{self.max_retries}): {str(e)}. "
f"Retrying in {delay}s..."
)
await asyncio.sleep(delay)
delay *= self.retry_backoff
else:
logger.error(
f"Request failed after {self.max_retries} attempts: {str(e)}"
)
except Exception as e:
# For non-retryable errors, raise immediately
logger.error(f"Non-retryable error: {str(e)}")
raise
if last_exception:
raise last_exception
def _create_span_attributes(
self,
method: str,
url: str,
**kwargs: Any
) -> dict[str, Any]:
"""
Create OpenTelemetry span attributes.
Args:
method: HTTP method
url: Request URL
**kwargs: Additional attributes
Returns:
Dictionary of span attributes
"""
attributes = {
"http.method": method,
"http.url": url,
"http.client": self.__class__.__name__,
}
attributes.update(kwargs)
return attributes
def _log_request(
self,
method: str,
url: str,
headers: Optional[dict[str, str]] = None,
**kwargs: Any
) -> None:
"""
Log outgoing request.
Args:
method: HTTP method
url: Request URL
headers: Request headers
**kwargs: Additional log context
"""
logger.debug(
f"API Request: {method} {url}",
extra={
"method": method,
"url": url,
"headers": self._sanitize_headers(headers),
**kwargs
}
)
def _log_response(
self,
method: str,
url: str,
status_code: int,
elapsed: float,
**kwargs: Any
) -> None:
"""
Log API response.
Args:
method: HTTP method
url: Request URL
status_code: Response status code
elapsed: Request duration in seconds
**kwargs: Additional log context
"""
log_level = "info" if 200 <= status_code < 400 else "warning"
log_func = getattr(logger, log_level)
log_func(
f"API Response: {method} {url} - {status_code} ({elapsed:.3f}s)",
extra={
"method": method,
"url": url,
"status_code": status_code,
"elapsed_seconds": elapsed,
**kwargs
}
)
@staticmethod
def _sanitize_headers(headers: Optional[dict[str, str]]) -> dict[str, str]:
"""
Sanitize headers for logging (remove sensitive data).
Args:
headers: Request headers
Returns:
Sanitized headers
"""
if not headers:
return {}
sensitive_keys = {"authorization", "api-key", "x-api-key", "cookie", "set-cookie"}
return {
k: "***REDACTED***" if k.lower() in sensitive_keys else v
for k, v in headers.items()
}
async def health_check(self) -> bool:
"""
Check if the API client can connect to the service.
Returns:
bool: True if healthy, False otherwise
"""
try:
client = await self._get_client()
response = await client.get(f"{self.base_url}/health", timeout=5.0)
return response.status_code == 200
except Exception as e:
logger.error(f"{self.__class__.__name__} health check failed: {str(e)}")
return False