Files
kami_walmart_slide/spiders.py
danial 0fee89a585 ci: 更新环境变量获取逻辑并优化代码格式
- 在 config.py 中添加了对环境变量的空值检查
- 更新了 Dockerfile 中的环境变量设置
-优化了 proxy_pool.py 和 spiders.py 中的代码格式
- 修复了部分代码中的小问题
2025-04-12 15:19:08 +08:00

240 lines
8.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import platform
import random
import sys
import redis
import requests
import traceback
from bs4 import BeautifulSoup
from logger import get_logger
logger = get_logger()
class BalanceSpider:
def __init__(self, card_num):
self.card_num = card_num
self.session = requests.Session()
self.headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Pragma": "no-cache",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36",
"sec-ch-ua": '"Chromium";v="128", "Not;A=Brand";v="24", "Google Chrome";v="128"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
}
self.cookies = {
"JSESSIONID": "11135983F14E3998362F999181C47C11",
"route": "1b5f48f4adb644d219f3f9f868ac958d",
}
self.url = "https://www.upcard.com.cn:8091/chinaloyalty/walmart/qrybaltxn.html"
self.code_url = "https://www.upcard.com.cn:8091/webqry/walmart/image.jsp"
self.proxy_url = ""
current_os = platform.system()
if current_os == "Linux":
self.verify_url = "http://10.0.0.211:6800/api/v2/text/byte/verify/"
else:
self.verify_url = "http://127.0.0.1:6800/api/v2/text/byte/verify/"
self.proxies = {}
self.timeout = 2.5
current_os = platform.system()
if current_os == "Linux":
redis_url = "redis://:jd2024@10.0.0.211:6379/0"
else:
redis_url = "redis://:jd2024@120.79.27.250:6379/0"
REDIS_POOL = redis.ConnectionPool.from_url(redis_url)
self.redis_conn = redis.StrictRedis(connection_pool=REDIS_POOL)
def request_index(self):
self.session.get(
self.url,
headers=self.headers,
cookies=self.cookies,
proxies=self.proxies,
timeout=self.timeout,
)
def request_code(self):
res = self.session.get(
self.code_url,
headers=self.headers,
cookies=self.cookies,
proxies=self.proxies,
timeout=self.timeout,
)
return res.content
def request_next_index(self, code):
params = {"link": "next"}
data = {
"card1": self.card_num[0:4],
"card2": self.card_num[4:8],
"card3": self.card_num[8:12],
"card4": self.card_num[12:],
"rand": code,
}
self.session.post(
self.url,
headers=self.headers,
cookies=self.cookies,
params=params,
data=data,
proxies=self.proxies,
timeout=self.timeout,
)
def request_balance(self):
data = {"dateFrom": "", "dateTo": "", "cardNo": self.card_num}
response = self.session.post(
self.url,
headers=self.headers,
cookies=self.cookies,
data=data,
proxies=self.proxies,
timeout=self.timeout,
)
return response.text
def verify_code(self, content):
for i in range(3):
try:
response = requests.post(self.verify_url, data=content, timeout=5)
logger.info(f"订单号:{self.card_num},识别验证码返回:{response.text}")
res = response.json()
return res.get("data").get("code")
except:
logger.info(
f"订单号:{self.card_num},识别验证码异常:{traceback.format_exc()}"
)
return None
def get_balance(self, html_content):
soup = BeautifulSoup(html_content, "html.parser")
element = soup.find(class_="STYLE3")
if element:
text = element.text
balance = text.split("余额:")[-1]
return balance
return None
def get_proxy(self):
res = self.redis_conn.get("during_proxy")
proxy_info = res.decode("utf-8")
proxy_info = json.loads(proxy_info)
ip = f"http://{proxy_info['account']}:{proxy_info['password']}@{proxy_info['ip']}:{proxy_info['port']}"
# ip = f"http://18589221295:WH5cgstS@218.78.97.247:33277"
self.proxies = {
"http": ip,
"https": ip,
}
logger.info(f"订单号:{self.card_num}get_proxy获取长效代理ip{self.proxies}")
@staticmethod
def xiongmao_proxy():
_version = sys.version_info
is_python3 = _version[0] == 3
ip = "dtqybf.xiongmaodaili.com"
# 企业动态按并发
port = 8091
proxyMeta = "http://%(user)s:%(pass)s@%(host)s:%(port)s" % {
"host": ip,
"port": port,
# 下值修改为订单中的用户名
"user": "tnHS5nm5",
# 下值修改为订单中的密码
"pass": "yP6626fR",
}
proxy = {"http": proxyMeta, "https": proxyMeta}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36"
}
r = requests.get(
"https://httpbin.org/ip",
headers=headers,
proxies=proxy,
verify=False,
allow_redirects=False,
)
r.encoding = "utf8"
print(r.status_code)
print(r.text)
if r.status_code == 302 or r.status_code == 301:
loc = r.headers["Location"]
print(loc)
r = requests.get(
loc, headers=headers, proxies=proxy, verify=False, allow_redirects=False
)
r.encoding = "utf8"
print(r.status_code)
print(r.text)
return proxy
def get_proxy2(self):
proxy_list = self.redis_conn.lrange("proxy_list", 0, -1)
proxy_list = [i.decode("utf-8") for i in proxy_list]
ip = random.choice(proxy_list)
self.proxies = {
"http": ip,
"https": ip,
}
logger.info(
f"订单号:{self.card_num}get_proxy2获取短效代理ip{self.proxies}"
)
def run(self):
code = 20016
balance = "0.0"
for i in range(5):
if i == 0:
self.get_proxy()
try:
self.request_index()
code_content = self.request_code()
verify_code = self.verify_code(code_content)
if not verify_code:
logger.info(f"卡号:{self.card_num},验证码校验失败,返回空")
continue
self.request_next_index(verify_code)
balance_html_str = self.request_balance()
logger.info(
f"订单号:{self.card_num}balance_html_str页面返回{balance_html_str}"
)
if "卡号不存在" in balance_html_str:
return 20017, "0.0"
if "请输入验证码" in balance_html_str:
continue
balance = self.get_balance(balance_html_str)
if not balance:
return 20016, "0.0"
return 2000, balance
except:
logger.info(
f"卡号:{self.card_num},报错信息:{traceback.format_exc()}"
)
continue
return code, balance
if __name__ == "__main__":
# card_num = "2326990641118388461"
# res = BalanceSpider(
# card_num=card_num
# ).run()
# print(res)
BalanceSpider.xiongmao_proxy()