Files
kami_spider_monorepo/apps/shared/proxy_pool/proxy_pool.py
danial 6c768b6e7b feat(jd): 添加京东相关路由及苹果权益充值功能
- 新增jd模块基础路由,整合app_store和payment子路由
- 实现苹果权益充值接口,支持苹果、携程及沃尔玛多个渠道
- 实现卡号密码查询接口,支持不同类别订单查询
- 新增短信认证相关接口,实现短信验证码发送及短信登录
- 新增商品管理接口,支持SKU详情查询及账号类下单功能
- 新增订单管理接口,实现订单删除功能
- 实现支付相关接口,增加刷新支付参数功能
- 定义完整请求及响应数据模型,确保接口数据规范
- 编写AppStoreSpider类,封装苹果应用内订单处理逻辑
- 引入多种代理池及请求重试机制,增强接口稳定性
- 添加详细日志记录,便于请求追踪与错误排查
2025-11-03 19:35:39 +08:00

443 lines
17 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from collections import Counter
import threading
import time
from abc import ABC, abstractmethod
from typing import Optional, Dict, Tuple
from curl_cffi import ProxySpec, requests
from core.config import ProxyPoolType, settings
from observability.logging import get_logger_with_trace
# from app.config.config import Config
# from app.logging.logger import get_logger
# from app.proxy_pool.enums import ProxyPoolType
# 禁用 HTTPS 请求的警告
logger = get_logger_with_trace(__name__)
class BaseProxyPool(ABC):
"""代理池抽象基类"""
def __init__(self):
self.proxy_timeout = 3 # 代理超时时间,单位秒
self.test_url = "https://www.baidu.com"
self.max_retries = 1 # 最大重试次数
self.lock = threading.Lock() # 添加线程锁以确保并发安全
@abstractmethod
def get_proxy(self, *args, **kwargs) -> Optional[str]:
"""获取代理的抽象方法"""
pass
@abstractmethod
def release_proxy(self, *args, **kwargs):
"""释放代理的抽象方法"""
pass
@abstractmethod
def _get_new_proxy(self) -> Optional[str]:
"""获取新的代理的抽象方法"""
pass
@abstractmethod
def mark_proxy_invalid(self, proxy: str):
"""标记代理为无效的抽象方法"""
pass
@abstractmethod
def remove_invalid_proxy(self, proxy: str):
"""删除指定的无效代理的抽象方法"""
pass
def _validate_proxy_with_auth(self, proxy: str) -> bool:
"""验证带认证信息的代理是否可用
Args:
proxy: 代理地址可能是纯IP地址或带认证信息的完整代理地址
Returns:
bool: 代理是否可用
"""
# 检查是否已经是带认证信息的代理地址
if proxy.startswith("http://") and "@" in proxy:
proxyMeta = ProxySpec(all=proxy)
else:
# 如果是纯IP地址添加认证信息
proxyMeta: ProxySpec = ProxySpec(all="http://%(user)s:%(pass)s@%(host)s" % {
"host": proxy,
"user": settings.proxy_username,
"pass": settings.proxy_password,
})
for attempt in range(self.max_retries):
try:
response = requests.get(
self.test_url,
proxies=proxyMeta,
timeout=self.proxy_timeout,
verify=False,
)
if response.status_code == 200:
return True
logger.warning(
f"带认证的代理验证失败,状态码: {response.status_code},重试次数: {attempt + 1},代理: {proxyMeta}"
)
except Exception as e:
logger.warning(
f"带认证的代理验证出错: {str(e)},代理: {proxyMeta},重试次数: {attempt + 1}"
)
return False
class DefaultProxyPool(BaseProxyPool):
"""默认代理池实现"""
def __init__(self):
super().__init__()
self.order_proxy_map: Dict[str, str] = {} # 订单ID -> 代理地址
self.proxy_order_count_map: Dict[str, int] = {} # 代理地址 -> 使用计数
self.max_orders_per_proxy = 5 # 一个代理最多可在5个订单中使用
def get_proxy(self, order_id: str = "") -> Optional[str]:
"""获取指定订单的代理
Args:
order_id: 订单ID
Returns:
代理地址,格式为 http://user:pass@ip:port如果获取失败则返回None
"""
with self.lock: # 使用线程锁确保并发安全
# 检查是否已有分配的代理
if order_id in self.order_proxy_map:
proxy = self.order_proxy_map[order_id]
if self._validate_proxy_with_auth(proxy):
return proxy
else:
# 代理无效,清理相关映射
self._cleanup_proxy_mapping(proxy)
del self.order_proxy_map[order_id]
# 尝试复用现有代理(使用次数未达上限)
for proxy, count in self.proxy_order_count_map.items():
if count < self.max_orders_per_proxy and self._validate_proxy_with_auth(proxy):
# 复用此代理
self.order_proxy_map[order_id] = proxy
self.proxy_order_count_map[proxy] = count + 1
logger.info(f"订单 {order_id} 复用代理 {proxy},当前使用次数: {count + 1}")
return proxy
# 获取新代理
try:
proxy = self._get_new_proxy()
if proxy is not None:
self.order_proxy_map[order_id] = proxy
self.proxy_order_count_map[proxy] = 1
logger.info(f"订单 {order_id} 获取新代理 {proxy}")
return proxy
except Exception as e:
logger.error(f"获取代理失败: {str(e)}")
return None
def release_proxy(self, order_id: str):
"""释放指定订单的代理
Args:
order_id: 订单ID
"""
with self.lock: # 使用线程锁确保并发安全
if order_id in self.order_proxy_map:
proxy = self.order_proxy_map[order_id]
del self.order_proxy_map[order_id]
# 减少代理使用计数
if proxy in self.proxy_order_count_map:
self.proxy_order_count_map[proxy] -= 1
if self.proxy_order_count_map[proxy] <= 0:
del self.proxy_order_count_map[proxy]
logger.info(f"代理 {proxy} 使用计数归零,已从计数映射中删除")
else:
logger.info(f"订单 {order_id} 释放代理 {proxy},剩余使用次数: {self.proxy_order_count_map[proxy]}")
logger.info(f"订单 {order_id} 已释放代理 {proxy}")
def _cleanup_proxy_mapping(self, proxy: str):
"""清理代理相关的所有映射
Args:
proxy: 要清理的代理地址
"""
if proxy in self.proxy_order_count_map:
del self.proxy_order_count_map[proxy]
logger.info(f"已清理代理 {proxy} 的计数映射")
def mark_proxy_invalid(self, proxy: str):
"""标记代理为无效
Args:
proxy: 要标记为无效的代理地址
"""
self.remove_invalid_proxy(proxy)
def remove_invalid_proxy(self, proxy: str):
"""删除指定的无效代理
Args:
proxy: 要删除的代理地址
"""
with self.lock:
# 查找并删除使用该代理的所有订单映射
orders_to_remove = []
for order_id, proxy_addr in self.order_proxy_map.items():
if proxy_addr == proxy:
orders_to_remove.append(order_id)
# 删除找到的订单映射
for order_id in orders_to_remove:
del self.order_proxy_map[order_id]
logger.info(f"已删除订单 {order_id} 的无效代理 {proxy}")
# 清理代理的计数映射
self._cleanup_proxy_mapping(proxy)
if not orders_to_remove:
logger.warning(f"未找到使用代理 {proxy} 的订单,可能已被删除")
def _get_new_proxy(self) -> Optional[str]:
"""获取新的代理最多尝试3次
Returns:
代理地址,格式为 http://user:pass@ip:port
Raises:
Exception: 连续3次获取代理失败时抛出异常
"""
max_attempts = 2 # 最大尝试次数
for attempt in range(max_attempts):
try:
res = requests.get(
settings.proxy_url,
timeout=self.proxy_timeout,
)
ip = res.text.strip()
if ip and self._validate_proxy_with_auth(ip):
# 构建带认证信息的代理地址
proxyMeta = "http://%(user)s:%(pass)s@%(host)s" % {
"host": ip,
"user": settings.proxy_username,
"pass": settings.proxy_password,
}
return proxyMeta
logger.warning(
f"获取代理失败或代理验证失败,尝试次数: {attempt + 1}/{max_attempts}"
)
except Exception as e:
logger.error(
f"获取代理出错: {str(e)},尝试次数: {attempt + 1}/{max_attempts}"
)
# 所有尝试都失败,抛出异常
raise Exception(f"连续{max_attempts}次获取代理失败")
def get_all_proxies(self) -> Dict[str, str]:
"""获取所有订单的代理映射
Returns:
Dict[str, str]: 订单ID到代理地址的映射
"""
with self.lock: # 使用线程锁确保并发安全
return self.order_proxy_map.copy()
def get_proxy_usage_stats(self) -> Dict[str, int]:
"""获取代理使用统计信息
Returns:
Dict[str, int]: 代理地址到使用次数的映射
"""
with self.lock: # 使用线程锁确保并发安全
return self.proxy_order_count_map.copy()
def set_max_orders_per_proxy(self, max_orders: int):
"""设置一个代理最多可使用的订单数
Args:
max_orders: 最大订单数量
"""
with self.lock:
self.max_orders_per_proxy = max_orders
logger.info(f"代理最大使用次数已设置为: {max_orders}")
class ExpiringProxyPool(BaseProxyPool):
"""带有效期的代理池实现"""
def __init__(self, expire_time: int = 60):
super().__init__()
self.current_proxy: Optional[Tuple[str, float]] = None # 当前代理及其过期时间
self.proxy_order_count_map = Counter() # 代理使用计数
self.expire_time = expire_time # 代理有效期
self.invalid_proxies: set = set() # 存储无效的代理
self.max_orders_per_proxy = 5 # 一个代理最多可在5个订单中使用
def get_proxy(self, order_id: str = "") -> Optional[str]:
"""获取指定订单的代理
Args:
order_id: 订单ID
Returns:
代理地址,格式为 http://user:pass@ip:port如果获取失败则返回None
"""
with self.lock: # 使用线程锁确保并发安全
# 检查当前代理是否有效且未过期
if self.current_proxy:
proxy, expire_time = self.current_proxy
# 如果代理未过期且不在无效列表中,则返回
if (
time.time() < expire_time
and proxy not in self.invalid_proxies
and self._validate_proxy_with_auth(proxy)
and self.proxy_order_count_map[proxy] < self.max_orders_per_proxy
):
self.proxy_order_count_map[proxy] += 1
return proxy
# 获取新代理
try:
proxy = self._get_new_proxy()
if proxy:
# 设置代理过期时间
expire_time = time.time() + self.expire_time
self.current_proxy = (proxy, expire_time)
return proxy
except Exception as e:
logger.error(f"获取代理失败: {str(e)}")
return None
return None
def release_proxy(self, *args, **kwargs):
"""释放代理(此实现不需要)"""
pass
def _get_new_proxy(self) -> Optional[str]:
"""获取新的代理最多尝试3次
Returns:
代理地址,格式为 http://user:pass@ip:port
Raises:
Exception: 连续3次获取代理失败时抛出异常
"""
max_attempts = 2 # 最大尝试次数
logger.info(
f"获取代理{settings.proxy_url} {settings.proxy_username} {settings.proxy_password}"
)
for attempt in range(max_attempts):
try:
res = requests.get(
settings.proxy_url,
timeout=self.proxy_timeout,
)
ip = res.text.strip()
logger.info(f"获取代理:{ip}")
if ip and self._validate_proxy_with_auth(ip):
# 构建带认证信息的代理地址
proxyMeta = "http://%(user)s:%(pass)s@%(host)s" % {
"host": ip,
"user": settings.proxy_username,
"pass": settings.proxy_password,
}
# 检查新获取的代理是否在无效列表中
if proxyMeta not in self.invalid_proxies:
return proxyMeta
logger.warning(
f"获取的代理 {proxyMeta} 在无效列表中,继续尝试获取新代理"
)
logger.warning(
f"获取代理失败或代理验证失败,尝试次数: {attempt + 1}/{max_attempts}"
)
except Exception as e:
logger.error(
f"获取代理出错: {str(e)},尝试次数: {attempt + 1}/{max_attempts}"
)
# 所有尝试都失败,抛出异常
raise Exception(f"连续{max_attempts}次获取代理失败")
def mark_proxy_invalid(self, proxy: str):
"""标记代理为无效
Args:
proxy: 要标记为无效的代理地址
"""
with self.lock:
# 添加到无效代理列表
self.invalid_proxies.add(proxy)
# 如果当前代理被标记为无效,立即清除
if self.current_proxy and self.current_proxy[0] == proxy:
self.current_proxy = None
logger.info(f"代理 {proxy} 已被标记为无效")
def clear_invalid_proxies(self):
"""清除无效代理列表
用于定期清理无效代理列表,避免内存占用过大
"""
with self.lock:
self.invalid_proxies.clear()
logger.info("无效代理列表已清除")
def remove_invalid_proxy(self, proxy: str):
"""删除指定的无效代理
Args:
proxy: 要删除的代理地址
"""
with self.lock:
# 从无效代理列表中删除该代理(如果存在)
if proxy in self.invalid_proxies:
self.invalid_proxies.remove(proxy)
logger.info(f"已从无效代理列表中删除代理 {proxy}")
# 如果当前代理就是要删除的代理,也清除当前代理
if self.current_proxy and self.current_proxy[0] == proxy:
self.current_proxy = None
logger.info(f"已清除当前无效代理 {proxy}")
def set_expire_time(self, seconds: int):
"""设置代理有效期
Args:
seconds: 代理有效期,单位秒
"""
with self.lock:
self.expire_time = seconds
class ProxyPoolFactory:
"""代理池工厂类"""
_instances: Dict[ProxyPoolType, BaseProxyPool] = {}
_lock = threading.Lock()
@classmethod
def get_proxy_pool(
cls, pool_type: ProxyPoolType = ProxyPoolType.DEFAULT, **kwargs
) -> BaseProxyPool:
"""获取代理池实例
Args:
pool_type: 代理池类型
**kwargs: 代理池初始化参数
Returns:
BaseProxyPool: 代理池实例
"""
with cls._lock:
if pool_type not in cls._instances:
if pool_type == ProxyPoolType.DEFAULT:
cls._instances[pool_type] = DefaultProxyPool()
elif pool_type == ProxyPoolType.EXPIRING:
expire_time = kwargs.get("expire_time", 60)
cls._instances[pool_type] = ExpiringProxyPool(
expire_time=expire_time
)
return cls._instances[pool_type]
# 使用示例
if __name__ == "__main__":
# 获取默认代理池
# default_pool = ProxyPoolFactory.get_proxy_pool()
# print(default_pool.get_proxy("test_order"))
# 获取带有效期的代理池
for i in range(10):
expiring_pool = ProxyPoolFactory.get_proxy_pool(
ProxyPoolType.EXPIRING, expire_time=60
)
proxy = expiring_pool.get_proxy(order_id="test_order")
print(proxy)
time.sleep(2)