from collections import Counter import threading import time from abc import ABC, abstractmethod from typing import Optional, Dict, Tuple from curl_cffi import ProxySpec, requests from core.config import ProxyPoolType, settings from observability.logging import get_logger_with_trace # from app.config.config import Config # from app.logging.logger import get_logger # from app.proxy_pool.enums import ProxyPoolType # 禁用 HTTPS 请求的警告 logger = get_logger_with_trace(__name__) class BaseProxyPool(ABC): """代理池抽象基类""" def __init__(self): self.proxy_timeout = 3 # 代理超时时间,单位秒 self.test_url = "https://www.baidu.com" self.max_retries = 1 # 最大重试次数 self.lock = threading.Lock() # 添加线程锁以确保并发安全 @abstractmethod def get_proxy(self, *args, **kwargs) -> Optional[str]: """获取代理的抽象方法""" pass @abstractmethod def release_proxy(self, *args, **kwargs): """释放代理的抽象方法""" pass @abstractmethod def _get_new_proxy(self) -> Optional[str]: """获取新的代理的抽象方法""" pass @abstractmethod def mark_proxy_invalid(self, proxy: str): """标记代理为无效的抽象方法""" pass @abstractmethod def remove_invalid_proxy(self, proxy: str): """删除指定的无效代理的抽象方法""" pass def _validate_proxy_with_auth(self, proxy: str) -> bool: """验证带认证信息的代理是否可用 Args: proxy: 代理地址,可能是纯IP地址或带认证信息的完整代理地址 Returns: bool: 代理是否可用 """ # 检查是否已经是带认证信息的代理地址 if proxy.startswith("http://") and "@" in proxy: proxyMeta = ProxySpec(all=proxy) else: # 如果是纯IP地址,添加认证信息 proxyMeta: ProxySpec = ProxySpec(all="http://%(user)s:%(pass)s@%(host)s" % { "host": proxy, "user": settings.proxy_username, "pass": settings.proxy_password, }) for attempt in range(self.max_retries): try: response = requests.get( self.test_url, proxies=proxyMeta, timeout=self.proxy_timeout, verify=False, ) if response.status_code == 200: return True logger.warning( f"带认证的代理验证失败,状态码: {response.status_code},重试次数: {attempt + 1},代理: {proxyMeta}" ) except Exception as e: logger.warning( f"带认证的代理验证出错: {str(e)},代理: {proxyMeta},重试次数: {attempt + 1}" ) return False class DefaultProxyPool(BaseProxyPool): """默认代理池实现""" def __init__(self): super().__init__() self.order_proxy_map: Dict[str, str] = {} # 订单ID -> 代理地址 self.proxy_order_count_map: Dict[str, int] = {} # 代理地址 -> 使用计数 self.max_orders_per_proxy = 5 # 一个代理最多可在5个订单中使用 def get_proxy(self, order_id: str = "") -> Optional[str]: """获取指定订单的代理 Args: order_id: 订单ID Returns: 代理地址,格式为 http://user:pass@ip:port,如果获取失败则返回None """ with self.lock: # 使用线程锁确保并发安全 # 检查是否已有分配的代理 if order_id in self.order_proxy_map: proxy = self.order_proxy_map[order_id] if self._validate_proxy_with_auth(proxy): return proxy else: # 代理无效,清理相关映射 self._cleanup_proxy_mapping(proxy) del self.order_proxy_map[order_id] # 尝试复用现有代理(使用次数未达上限) for proxy, count in self.proxy_order_count_map.items(): if count < self.max_orders_per_proxy and self._validate_proxy_with_auth(proxy): # 复用此代理 self.order_proxy_map[order_id] = proxy self.proxy_order_count_map[proxy] = count + 1 logger.info(f"订单 {order_id} 复用代理 {proxy},当前使用次数: {count + 1}") return proxy # 获取新代理 try: proxy = self._get_new_proxy() if proxy is not None: self.order_proxy_map[order_id] = proxy self.proxy_order_count_map[proxy] = 1 logger.info(f"订单 {order_id} 获取新代理 {proxy}") return proxy except Exception as e: logger.error(f"获取代理失败: {str(e)}") return None def release_proxy(self, order_id: str): """释放指定订单的代理 Args: order_id: 订单ID """ with self.lock: # 使用线程锁确保并发安全 if order_id in self.order_proxy_map: proxy = self.order_proxy_map[order_id] del self.order_proxy_map[order_id] # 减少代理使用计数 if proxy in self.proxy_order_count_map: self.proxy_order_count_map[proxy] -= 1 if self.proxy_order_count_map[proxy] <= 0: del self.proxy_order_count_map[proxy] logger.info(f"代理 {proxy} 使用计数归零,已从计数映射中删除") else: logger.info(f"订单 {order_id} 释放代理 {proxy},剩余使用次数: {self.proxy_order_count_map[proxy]}") logger.info(f"订单 {order_id} 已释放代理 {proxy}") def _cleanup_proxy_mapping(self, proxy: str): """清理代理相关的所有映射 Args: proxy: 要清理的代理地址 """ if proxy in self.proxy_order_count_map: del self.proxy_order_count_map[proxy] logger.info(f"已清理代理 {proxy} 的计数映射") def mark_proxy_invalid(self, proxy: str): """标记代理为无效 Args: proxy: 要标记为无效的代理地址 """ self.remove_invalid_proxy(proxy) def remove_invalid_proxy(self, proxy: str): """删除指定的无效代理 Args: proxy: 要删除的代理地址 """ with self.lock: # 查找并删除使用该代理的所有订单映射 orders_to_remove = [] for order_id, proxy_addr in self.order_proxy_map.items(): if proxy_addr == proxy: orders_to_remove.append(order_id) # 删除找到的订单映射 for order_id in orders_to_remove: del self.order_proxy_map[order_id] logger.info(f"已删除订单 {order_id} 的无效代理 {proxy}") # 清理代理的计数映射 self._cleanup_proxy_mapping(proxy) if not orders_to_remove: logger.warning(f"未找到使用代理 {proxy} 的订单,可能已被删除") def _get_new_proxy(self) -> Optional[str]: """获取新的代理,最多尝试3次 Returns: 代理地址,格式为 http://user:pass@ip:port Raises: Exception: 连续3次获取代理失败时抛出异常 """ max_attempts = 2 # 最大尝试次数 for attempt in range(max_attempts): try: res = requests.get( settings.proxy_url, timeout=self.proxy_timeout, ) ip = res.text.strip() if ip and self._validate_proxy_with_auth(ip): # 构建带认证信息的代理地址 proxyMeta = "http://%(user)s:%(pass)s@%(host)s" % { "host": ip, "user": settings.proxy_username, "pass": settings.proxy_password, } return proxyMeta logger.warning( f"获取代理失败或代理验证失败,尝试次数: {attempt + 1}/{max_attempts}" ) except Exception as e: logger.error( f"获取代理出错: {str(e)},尝试次数: {attempt + 1}/{max_attempts}" ) # 所有尝试都失败,抛出异常 raise Exception(f"连续{max_attempts}次获取代理失败") def get_all_proxies(self) -> Dict[str, str]: """获取所有订单的代理映射 Returns: Dict[str, str]: 订单ID到代理地址的映射 """ with self.lock: # 使用线程锁确保并发安全 return self.order_proxy_map.copy() def get_proxy_usage_stats(self) -> Dict[str, int]: """获取代理使用统计信息 Returns: Dict[str, int]: 代理地址到使用次数的映射 """ with self.lock: # 使用线程锁确保并发安全 return self.proxy_order_count_map.copy() def set_max_orders_per_proxy(self, max_orders: int): """设置一个代理最多可使用的订单数 Args: max_orders: 最大订单数量 """ with self.lock: self.max_orders_per_proxy = max_orders logger.info(f"代理最大使用次数已设置为: {max_orders}") class ExpiringProxyPool(BaseProxyPool): """带有效期的代理池实现""" def __init__(self, expire_time: int = 60): super().__init__() self.current_proxy: Optional[Tuple[str, float]] = None # 当前代理及其过期时间 self.proxy_order_count_map = Counter() # 代理使用计数 self.expire_time = expire_time # 代理有效期 self.invalid_proxies: set = set() # 存储无效的代理 self.max_orders_per_proxy = 5 # 一个代理最多可在5个订单中使用 def get_proxy(self, order_id: str = "") -> Optional[str]: """获取指定订单的代理 Args: order_id: 订单ID Returns: 代理地址,格式为 http://user:pass@ip:port,如果获取失败则返回None """ with self.lock: # 使用线程锁确保并发安全 # 检查当前代理是否有效且未过期 if self.current_proxy: proxy, expire_time = self.current_proxy # 如果代理未过期且不在无效列表中,则返回 if ( time.time() < expire_time and proxy not in self.invalid_proxies and self._validate_proxy_with_auth(proxy) and self.proxy_order_count_map[proxy] < self.max_orders_per_proxy ): self.proxy_order_count_map[proxy] += 1 return proxy # 获取新代理 try: proxy = self._get_new_proxy() if proxy: # 设置代理过期时间 expire_time = time.time() + self.expire_time self.current_proxy = (proxy, expire_time) return proxy except Exception as e: logger.error(f"获取代理失败: {str(e)}") return None return None def release_proxy(self, *args, **kwargs): """释放代理(此实现不需要)""" pass def _get_new_proxy(self) -> Optional[str]: """获取新的代理,最多尝试3次 Returns: 代理地址,格式为 http://user:pass@ip:port Raises: Exception: 连续3次获取代理失败时抛出异常 """ max_attempts = 2 # 最大尝试次数 logger.info( f"获取代理{settings.proxy_url} {settings.proxy_username} {settings.proxy_password}" ) for attempt in range(max_attempts): try: res = requests.get( settings.proxy_url, timeout=self.proxy_timeout, ) ip = res.text.strip() logger.info(f"获取代理:{ip}") if ip and self._validate_proxy_with_auth(ip): # 构建带认证信息的代理地址 proxyMeta = "http://%(user)s:%(pass)s@%(host)s" % { "host": ip, "user": settings.proxy_username, "pass": settings.proxy_password, } # 检查新获取的代理是否在无效列表中 if proxyMeta not in self.invalid_proxies: return proxyMeta logger.warning( f"获取的代理 {proxyMeta} 在无效列表中,继续尝试获取新代理" ) logger.warning( f"获取代理失败或代理验证失败,尝试次数: {attempt + 1}/{max_attempts}" ) except Exception as e: logger.error( f"获取代理出错: {str(e)},尝试次数: {attempt + 1}/{max_attempts}" ) # 所有尝试都失败,抛出异常 raise Exception(f"连续{max_attempts}次获取代理失败") def mark_proxy_invalid(self, proxy: str): """标记代理为无效 Args: proxy: 要标记为无效的代理地址 """ with self.lock: # 添加到无效代理列表 self.invalid_proxies.add(proxy) # 如果当前代理被标记为无效,立即清除 if self.current_proxy and self.current_proxy[0] == proxy: self.current_proxy = None logger.info(f"代理 {proxy} 已被标记为无效") def clear_invalid_proxies(self): """清除无效代理列表 用于定期清理无效代理列表,避免内存占用过大 """ with self.lock: self.invalid_proxies.clear() logger.info("无效代理列表已清除") def remove_invalid_proxy(self, proxy: str): """删除指定的无效代理 Args: proxy: 要删除的代理地址 """ with self.lock: # 从无效代理列表中删除该代理(如果存在) if proxy in self.invalid_proxies: self.invalid_proxies.remove(proxy) logger.info(f"已从无效代理列表中删除代理 {proxy}") # 如果当前代理就是要删除的代理,也清除当前代理 if self.current_proxy and self.current_proxy[0] == proxy: self.current_proxy = None logger.info(f"已清除当前无效代理 {proxy}") def set_expire_time(self, seconds: int): """设置代理有效期 Args: seconds: 代理有效期,单位秒 """ with self.lock: self.expire_time = seconds class ProxyPoolFactory: """代理池工厂类""" _instances: Dict[ProxyPoolType, BaseProxyPool] = {} _lock = threading.Lock() @classmethod def get_proxy_pool( cls, pool_type: ProxyPoolType = ProxyPoolType.DEFAULT, **kwargs ) -> BaseProxyPool: """获取代理池实例 Args: pool_type: 代理池类型 **kwargs: 代理池初始化参数 Returns: BaseProxyPool: 代理池实例 """ with cls._lock: if pool_type not in cls._instances: if pool_type == ProxyPoolType.DEFAULT: cls._instances[pool_type] = DefaultProxyPool() elif pool_type == ProxyPoolType.EXPIRING: expire_time = kwargs.get("expire_time", 60) cls._instances[pool_type] = ExpiringProxyPool( expire_time=expire_time ) return cls._instances[pool_type] # 使用示例 if __name__ == "__main__": # 获取默认代理池 # default_pool = ProxyPoolFactory.get_proxy_pool() # print(default_pool.get_proxy("test_order")) # 获取带有效期的代理池 for i in range(10): expiring_pool = ProxyPoolFactory.get_proxy_pool( ProxyPoolType.EXPIRING, expire_time=60 ) proxy = expiring_pool.get_proxy(order_id="test_order") print(proxy) time.sleep(2)