Skip to content

Commit

Permalink
feat: add xhs-api
Browse files Browse the repository at this point in the history
  • Loading branch information
ReaJason committed Oct 15, 2023
1 parent 879ac5a commit 45e59ff
Show file tree
Hide file tree
Showing 7 changed files with 150 additions and 40 deletions.
7 changes: 3 additions & 4 deletions docs/basic.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,17 @@

进阶使用
----------------
将 playwright 封装为服务端,主函数使用 requests 请求,获取签名
将 playwright 封装为服务端,主函数使用 requests 请求,获取签名,多账号使用统一签名服务请确保 cookie 中的 a1 字段统一,防止签名一直出现错误


环境安装
^^^^^^^^^^^^^^^^^^^^^^

可以直接使用 Docker 来起下面的 Flask 服务,然后使用 XhsClient 即可,注意端口变成了 8080
可以直接使用 Docker 来起下面的 Flask 服务,然后使用 XhsClient 即可,服务启动会打印 a1,推荐将自己的 cookie 中的 a1 与服务端设置成一致

.. code-block:: bash
docker run -it -d -p 8080:8080 reajason/xhs-sign:latest
docker run -it -d -p 5005:5005 reajason/xhs-api:latest
如果在本机启动 Flask 需要安装如下依赖:

Expand All @@ -50,6 +50,5 @@

使用 XhsClient
^^^^^^^^^^^^^^^^^^^
第一次请求会失败,但是之后的请求就正常了。

具体代码参考: `basic_sign_usage <https://github.com/ReaJason/xhs/blob/master/example/basic_sign_usage.py>`_
76 changes: 41 additions & 35 deletions example/basic_sign_server.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,48 @@
import time

from flask import Flask, request
from playwright.sync_api import sync_playwright
from gevent import monkey
import time

monkey.patch_all()

app = Flask(__name__)

def sign(uri, data=None, a1="", web_session=""):
for _ in range(10):
try:
with sync_playwright() as playwright:
stealth_js_path = "/Users/reajason/ReaJason/xhs/tests/stealth.min.js"
chromium = playwright.chromium

# 如果一直失败可尝试设置成 False 让其打开浏览器,适当添加 sleep 可查看浏览器状态
browser = chromium.launch(headless=True)

browser_context = browser.new_context()
browser_context.add_init_script(path=stealth_js_path)
context_page = browser_context.new_page()
context_page.goto("https://www.xiaohongshu.com")
browser_context.add_cookies([
{'name': 'a1', 'value': a1, 'domain': ".xiaohongshu.com", 'path': "/"}]
)
context_page.reload()
# 这个地方设置完浏览器 cookie 之后,如果这儿不 sleep 一下签名获取就失败了,如果经常失败请设置长一点试试
time.sleep(1)
encrypt_params = context_page.evaluate("([url, data]) => window._webmsxyw(url, data)", [uri, data])
return {
"x-s": encrypt_params["X-s"],
"x-t": str(encrypt_params["X-t"])
}
except Exception:
# 这儿有时会出现 window._webmsxyw is not a function 或未知跳转错误,因此加一个失败重试趴
pass
raise Exception("重试了这么多次还是无法签名成功,寄寄寄")


@app.route("/", methods=["POST"])

def get_context_page(instance, stealth_js_path):
chromium = instance.chromium
browser = chromium.launch(headless=True)
context = browser.new_context()
context.add_init_script(path=stealth_js_path)
page = context.new_page()
return context, page


# 如下更改为 stealth.min.js 文件路径地址
stealth_js_path = "/Users/reajason/ReaJason/xhs/tests/stealth.min.js"
print("正在启动 playwright")
playwright = sync_playwright().start()
browser_context, context_page = get_context_page(playwright, stealth_js_path)
context_page.goto("https://www.xiaohongshu.com")
print("正在跳转至小红书首页")
time.sleep(5)
context_page.reload()
time.sleep(1)
cookies = browser_context.cookies()
for cookie in cookies:
if cookie["name"] == "a1":
print("当前浏览器 cookie 中 a1 值为:" + cookie["value"] + ",请将需要使用的 a1 设置成一样方可签名成功")
print("跳转小红书首页成功,等待调用")


def sign(uri, data, a1, web_session):
encrypt_params = context_page.evaluate("([url, data]) => window._webmsxyw(url, data)", [uri, data])
return {
"x-s": encrypt_params["X-s"],
"x-t": str(encrypt_params["X-t"])
}


@app.route("/sign", methods=["POST"])
def hello_world():
json = request.json
uri = json["uri"]
Expand All @@ -47,4 +53,4 @@ def hello_world():


if __name__ == '__main__':
app.run(host="0.0.0.0", port=5006)
app.run(host="0.0.0.0", port=5005)
4 changes: 3 additions & 1 deletion example/basic_sign_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@

import requests

import xhs.help
from xhs import XhsClient


def sign(uri, data=None, a1="", web_session=""):
# 填写自己的 flask 签名服务端口地址
res = requests.post("http://localhost:5006",
res = requests.post("http://localhost:5005/sign",
json={"uri": uri, "data": data, "a1": a1, "web_session": web_session})
signs = res.json()
return {
Expand All @@ -24,3 +25,4 @@ def sign(uri, data=None, a1="", web_session=""):
note_info = xhs_client.get_note_by_id("63db8819000000001a01ead1")
print(datetime.datetime.now())
print(json.dumps(note_info, indent=2))
print(xhs.help.get_imgs_url_from_note(note_info))
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ pytest
pytest-cov
twine
build
Flask
playwright
23 changes: 23 additions & 0 deletions xhs-api/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM --platform=$TARGETPLATFORM mcr.microsoft.com/playwright/python:v1.38.0-jammy

LABEL authors="ReaJason"
LABEL mail="[email protected]"

WORKDIR /app
COPY app.py .

RUN set -ex \
&& apt-get update \
&& apt-get install -y --no-install-recommends curl


# reference -> https://playwright.dev/python/docs/ci#via-containers
RUN python -m pip install --upgrade pip \
&& pip install Flask gevent xhs playwright \
&& rm -rf /var/lib/apt/lists/*ç

RUN curl --insecure -L -o stealth.min.js https://cdn.jsdelivr.net/gh/requireCool/stealth.min.js/stealth.min.js

EXPOSE 5005

CMD [ "python", "-m" , "flask", "run", "--host=0.0.0.0", "--port=5005"]
12 changes: 12 additions & 0 deletions xhs-api/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
## 多架构构建

> 参考:https://yeasy.gitbook.io/docker_practice/buildx/multi-arch-images

```bash
docker buildx create --name mybuilder --driver docker-container

docker buildx use mybuilder

docker buildx build --platform linux/arm64,linux/amd64 -t reajason/xhs-api . --push
```
66 changes: 66 additions & 0 deletions xhs-api/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from flask import Flask, request
from playwright.sync_api import sync_playwright
from gevent import monkey
import time

monkey.patch_all()

app = Flask(__name__)

global_a1 = ""


def get_context_page(instance, stealth_js_path):
chromium = instance.chromium
browser = chromium.launch(headless=True)
context = browser.new_context()
context.add_init_script(path=stealth_js_path)
page = context.new_page()
return context, page


stealth_js_path = "stealth.min.js"
print("正在启动 playwright")
playwright = sync_playwright().start()
browser_context, context_page = get_context_page(playwright, stealth_js_path)
context_page.goto("https://www.xiaohongshu.com")
print("正在跳转至小红书首页")
time.sleep(5)
context_page.reload()
time.sleep(1)
cookies = browser_context.cookies()
for cookie in cookies:
if cookie["name"] == "a1":
global_a1 = cookie["value"]
print("当前浏览器中 a1 值为:" + global_a1 + ",请将您的 cookie 中的 a1 也设置成一样,方可签名成功")
print("跳转小红书首页成功,等待调用")


def sign(uri, data, a1, web_session):
global global_a1
if a1 != global_a1:
browser_context.add_cookies([
{'name': 'a1', 'value': a1, 'domain': ".xiaohongshu.com", 'path': "/"}
])
context_page.reload()
time.sleep(1)
global_a1 = a1
encrypt_params = context_page.evaluate("([url, data]) => window._webmsxyw(url, data)", [uri, data])
return {
"x-s": encrypt_params["X-s"],
"x-t": str(encrypt_params["X-t"])
}


@app.route("/sign", methods=["POST"])
def hello_world():
json = request.json
uri = json["uri"]
data = json["data"]
a1 = json["a1"]
web_session = json["web_session"]
return sign(uri, data, a1, web_session)


if __name__ == '__main__':
app.run(host="0.0.0.0", port=5005)

0 comments on commit 45e59ff

Please sign in to comment.