diff --git a/docs/basic.rst b/docs/basic.rst index 1f2e5a9..ac6e608 100644 --- a/docs/basic.rst +++ b/docs/basic.rst @@ -25,17 +25,17 @@ 进阶使用 ---------------- -将 playwright 封装为服务端,主函数使用 requests 请求,获取签名。 +将 playwright 封装为服务端,主函数使用 requests 请求,获取签名,多账号使用统一签名服务请确保 cookie 中的 a1 字段统一,防止签名一直出现错误 环境安装 ^^^^^^^^^^^^^^^^^^^^^^ -可以直接使用 Docker 来起下面的 Flask 服务,然后使用 XhsClient 即可,注意端口变成了 8080 +可以直接使用 Docker 来起下面的 Flask 服务,然后使用 XhsClient 即可,服务启动会打印 a1,推荐将自己的 cookie 中的 a1 与服务端设置成一致 .. code-block:: bash - docker run -it -d -p 8080:8080 reajason/xhs-sign:latest + docker run -it -d -p 5005:5005 reajason/xhs-api:latest 如果在本机启动 Flask 需要安装如下依赖: @@ -50,6 +50,5 @@ 使用 XhsClient ^^^^^^^^^^^^^^^^^^^ -第一次请求会失败,但是之后的请求就正常了。 具体代码参考: `basic_sign_usage `_ diff --git a/example/basic_sign_server.py b/example/basic_sign_server.py index 7f22fae..894bd7d 100644 --- a/example/basic_sign_server.py +++ b/example/basic_sign_server.py @@ -1,42 +1,48 @@ -import time - from flask import Flask, request from playwright.sync_api import sync_playwright +from gevent import monkey +import time + +monkey.patch_all() app = Flask(__name__) -def sign(uri, data=None, a1="", web_session=""): - for _ in range(10): - try: - with sync_playwright() as playwright: - stealth_js_path = "/Users/reajason/ReaJason/xhs/tests/stealth.min.js" - chromium = playwright.chromium - - # 如果一直失败可尝试设置成 False 让其打开浏览器,适当添加 sleep 可查看浏览器状态 - browser = chromium.launch(headless=True) - - browser_context = browser.new_context() - browser_context.add_init_script(path=stealth_js_path) - context_page = browser_context.new_page() - context_page.goto("https://www.xiaohongshu.com") - browser_context.add_cookies([ - {'name': 'a1', 'value': a1, 'domain': ".xiaohongshu.com", 'path': "/"}] - ) - context_page.reload() - # 这个地方设置完浏览器 cookie 之后,如果这儿不 sleep 一下签名获取就失败了,如果经常失败请设置长一点试试 - time.sleep(1) - encrypt_params = context_page.evaluate("([url, data]) => window._webmsxyw(url, data)", [uri, data]) - return { - "x-s": encrypt_params["X-s"], - "x-t": str(encrypt_params["X-t"]) - } - except Exception: - # 这儿有时会出现 window._webmsxyw is not a function 或未知跳转错误,因此加一个失败重试趴 - pass - raise Exception("重试了这么多次还是无法签名成功,寄寄寄") - - -@app.route("/", methods=["POST"]) + +def get_context_page(instance, stealth_js_path): + chromium = instance.chromium + browser = chromium.launch(headless=True) + context = browser.new_context() + context.add_init_script(path=stealth_js_path) + page = context.new_page() + return context, page + + +# 如下更改为 stealth.min.js 文件路径地址 +stealth_js_path = "/Users/reajason/ReaJason/xhs/tests/stealth.min.js" +print("正在启动 playwright") +playwright = sync_playwright().start() +browser_context, context_page = get_context_page(playwright, stealth_js_path) +context_page.goto("https://www.xiaohongshu.com") +print("正在跳转至小红书首页") +time.sleep(5) +context_page.reload() +time.sleep(1) +cookies = browser_context.cookies() +for cookie in cookies: + if cookie["name"] == "a1": + print("当前浏览器 cookie 中 a1 值为:" + cookie["value"] + ",请将需要使用的 a1 设置成一样方可签名成功") +print("跳转小红书首页成功,等待调用") + + +def sign(uri, data, a1, web_session): + encrypt_params = context_page.evaluate("([url, data]) => window._webmsxyw(url, data)", [uri, data]) + return { + "x-s": encrypt_params["X-s"], + "x-t": str(encrypt_params["X-t"]) + } + + +@app.route("/sign", methods=["POST"]) def hello_world(): json = request.json uri = json["uri"] @@ -47,4 +53,4 @@ def hello_world(): if __name__ == '__main__': - app.run(host="0.0.0.0", port=5006) + app.run(host="0.0.0.0", port=5005) diff --git a/example/basic_sign_usage.py b/example/basic_sign_usage.py index 5331ec4..264538a 100644 --- a/example/basic_sign_usage.py +++ b/example/basic_sign_usage.py @@ -3,12 +3,13 @@ import requests +import xhs.help from xhs import XhsClient def sign(uri, data=None, a1="", web_session=""): # 填写自己的 flask 签名服务端口地址 - res = requests.post("http://localhost:5006", + res = requests.post("http://localhost:5005/sign", json={"uri": uri, "data": data, "a1": a1, "web_session": web_session}) signs = res.json() return { @@ -24,3 +25,4 @@ def sign(uri, data=None, a1="", web_session=""): note_info = xhs_client.get_note_by_id("63db8819000000001a01ead1") print(datetime.datetime.now()) print(json.dumps(note_info, indent=2)) + print(xhs.help.get_imgs_url_from_note(note_info)) diff --git a/requirements.txt b/requirements.txt index b441752..5cd6b47 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,5 @@ pytest pytest-cov twine build +Flask +playwright diff --git a/xhs-api/Dockerfile b/xhs-api/Dockerfile new file mode 100644 index 0000000..b701f13 --- /dev/null +++ b/xhs-api/Dockerfile @@ -0,0 +1,23 @@ +FROM --platform=$TARGETPLATFORM mcr.microsoft.com/playwright/python:v1.38.0-jammy + +LABEL authors="ReaJason" +LABEL mail="reajason1225@gmail.com" + +WORKDIR /app +COPY app.py . + +RUN set -ex \ + && apt-get update \ + && apt-get install -y --no-install-recommends curl + + +# reference -> https://playwright.dev/python/docs/ci#via-containers +RUN python -m pip install --upgrade pip \ + && pip install Flask gevent xhs playwright \ + && rm -rf /var/lib/apt/lists/*ç + +RUN curl --insecure -L -o stealth.min.js https://cdn.jsdelivr.net/gh/requireCool/stealth.min.js/stealth.min.js + +EXPOSE 5005 + +CMD [ "python", "-m" , "flask", "run", "--host=0.0.0.0", "--port=5005"] diff --git a/xhs-api/README.md b/xhs-api/README.md new file mode 100644 index 0000000..70726df --- /dev/null +++ b/xhs-api/README.md @@ -0,0 +1,12 @@ +## 多架构构建 + +> 参考:https://yeasy.gitbook.io/docker_practice/buildx/multi-arch-images + + +```bash +docker buildx create --name mybuilder --driver docker-container + +docker buildx use mybuilder + +docker buildx build --platform linux/arm64,linux/amd64 -t reajason/xhs-api . --push +``` \ No newline at end of file diff --git a/xhs-api/app.py b/xhs-api/app.py new file mode 100644 index 0000000..e6a0e83 --- /dev/null +++ b/xhs-api/app.py @@ -0,0 +1,66 @@ +from flask import Flask, request +from playwright.sync_api import sync_playwright +from gevent import monkey +import time + +monkey.patch_all() + +app = Flask(__name__) + +global_a1 = "" + + +def get_context_page(instance, stealth_js_path): + chromium = instance.chromium + browser = chromium.launch(headless=True) + context = browser.new_context() + context.add_init_script(path=stealth_js_path) + page = context.new_page() + return context, page + + +stealth_js_path = "stealth.min.js" +print("正在启动 playwright") +playwright = sync_playwright().start() +browser_context, context_page = get_context_page(playwright, stealth_js_path) +context_page.goto("https://www.xiaohongshu.com") +print("正在跳转至小红书首页") +time.sleep(5) +context_page.reload() +time.sleep(1) +cookies = browser_context.cookies() +for cookie in cookies: + if cookie["name"] == "a1": + global_a1 = cookie["value"] + print("当前浏览器中 a1 值为:" + global_a1 + ",请将您的 cookie 中的 a1 也设置成一样,方可签名成功") +print("跳转小红书首页成功,等待调用") + + +def sign(uri, data, a1, web_session): + global global_a1 + if a1 != global_a1: + browser_context.add_cookies([ + {'name': 'a1', 'value': a1, 'domain': ".xiaohongshu.com", 'path': "/"} + ]) + context_page.reload() + time.sleep(1) + global_a1 = a1 + encrypt_params = context_page.evaluate("([url, data]) => window._webmsxyw(url, data)", [uri, data]) + return { + "x-s": encrypt_params["X-s"], + "x-t": str(encrypt_params["X-t"]) + } + + +@app.route("/sign", methods=["POST"]) +def hello_world(): + json = request.json + uri = json["uri"] + data = json["data"] + a1 = json["a1"] + web_session = json["web_session"] + return sign(uri, data, a1, web_session) + + +if __name__ == '__main__': + app.run(host="0.0.0.0", port=5005)