diff --git a/YYeTsFE b/YYeTsFE index d4117e9f4..9a90d07fa 160000 --- a/YYeTsFE +++ b/YYeTsFE @@ -1 +1 @@ -Subproject commit d4117e9f4230a0d53768299a0ff1ad90ec8b9a56 +Subproject commit 9a90d07fa16ad3b893d9cf256a3f409d1d8f5240 diff --git a/yyetsweb/common/utils.py b/yyetsweb/common/utils.py index 9b8cb58eb..b4aec69cf 100644 --- a/yyetsweb/common/utils.py +++ b/yyetsweb/common/utils.py @@ -12,12 +12,14 @@ import logging import os import pathlib +import re import smtplib import time from datetime import datetime from email.header import Header from email.mime.text import MIMEText from email.utils import formataddr, parseaddr +from hashlib import sha256 import coloredlogs import pytz @@ -36,6 +38,18 @@ def setup_logger(): ) +def hide_phone(data: list): + for item in data: + if item["username"].isdigit() and len(item["username"]) == 11: + item["hash"] = sha256(item["username"].encode("u8")).hexdigest() + item["username"] = mask_phone(item["username"]) + return data + + +def mask_phone(num): + return re.sub(r"(\d{3})\d{4}(\d{4})", r"\g<1>****\g<2>", num) + + def ts_date(ts=None): # Let's always set the timezone to CST timestamp = ts or time.time() diff --git a/yyetsweb/databases/base.py b/yyetsweb/databases/base.py index dfcae25cd..ff1267c4e 100644 --- a/yyetsweb/databases/base.py +++ b/yyetsweb/databases/base.py @@ -116,7 +116,7 @@ class SearchEngine(Mongo): "comment": "$content", "commentID": {"$toString": "$_id"}, "origin": "comment", - "hasAvatar": {"$toBool": "$avatar"}, + "hasAvatar": "yes", "resourceID": "$resource_id", "resourceName": {"$first": "$resource.data.info.cnname"}, "_id": {"$toString": "$_id"}, diff --git a/yyetsweb/databases/oauth.py b/yyetsweb/databases/oauth.py index af2181a89..c981ba18f 100644 --- a/yyetsweb/databases/oauth.py +++ b/yyetsweb/databases/oauth.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 # coding: utf-8 +from hashlib import sha256 + from common.utils import ts_date from databases.base import Mongo @@ -27,6 +29,7 @@ def add_user(self, username, ip, browser, uid, source: "str"): "oldUser": True, "source": source, "uid": uid, + "hash": sha256(username.encode("u8")).hexdigest(), } ) return { diff --git a/yyetsweb/databases/other.py b/yyetsweb/databases/other.py index ea42a7b52..bc8afbd67 100644 --- a/yyetsweb/databases/other.py +++ b/yyetsweb/databases/other.py @@ -8,6 +8,7 @@ import re import string import time +from hashlib import sha256 import pymongo import requests @@ -165,6 +166,15 @@ def import_ban_user(self): r.hset("user_blacklist", username, 100) r.close() + def fill_user_hash(self): + users = self.db["users"].find({"hash": {"$exists": False}}, projection={"username": True}) + # do it old school + for user in users: + logging.info("Filling hash for %s", user) + username = user["username"] + hash_value = sha256(username.encode("u8")).hexdigest() + self.db["users"].update_one({"username": username}, {"$set": {"hash": hash_value}}) + class Captcha(Redis): def get_captcha(self, captcha_id): diff --git a/yyetsweb/databases/resources.py b/yyetsweb/databases/resources.py index 63bcd52eb..a6b9f9b7e 100644 --- a/yyetsweb/databases/resources.py +++ b/yyetsweb/databases/resources.py @@ -11,7 +11,7 @@ import zhconv from tqdm import tqdm -from common.utils import ts_date +from common.utils import hide_phone, ts_date from databases.base import Mongo, Redis, SearchEngine from databases.comment import CommentSearch @@ -52,7 +52,7 @@ def meili_search(self, keyword: "str", search_type: "str") -> dict: returned = {"data": [], "comment": [], "extra": []} if search_type == "default": yyets = self.search_yyets(keyword) - comment = self.search_comment(keyword) + comment = hide_phone(self.search_comment(keyword)) returned["data"] = yyets returned["comment"] = comment return returned @@ -98,12 +98,13 @@ def mongodb_search(self, keyword: str) -> dict: zimuzu_data.append(item["data"]["info"]) # get comment - r = CommentSearch().get_comment(1, 2**10, keyword) c_search = [] - for c in r.get("data", []): + comments = CommentSearch().get_comment(1, 2**10, keyword) + hide_phone(comments.get("data", [])) + for c in comments.get("data", []): comment_rid = c["resource_id"] - d = self.db["yyets"].find_one({"data.info.id": comment_rid}, projection={"data.info": True}) - if d: + res = self.db["yyets"].find_one({"data.info.id": comment_rid}, projection={"data.info": True}) + if res: c_search.append( { "username": c["username"], @@ -111,9 +112,10 @@ def mongodb_search(self, keyword: str) -> dict: "comment": c["content"], "commentID": c["id"], "resourceID": comment_rid, - "resourceName": d["data"]["info"]["cnname"], + "resourceName": res["data"]["info"]["cnname"], "origin": "comment", "hasAvatar": c["hasAvatar"], + "hash": c["hash"], } ) # zimuzu -> comment -> extra diff --git a/yyetsweb/databases/user.py b/yyetsweb/databases/user.py index f6326c06f..ef253fa9a 100644 --- a/yyetsweb/databases/user.py +++ b/yyetsweb/databases/user.py @@ -3,10 +3,12 @@ import os import random import re +from hashlib import md5, sha256 from http import HTTPStatus import filetype import pymongo +import requests from passlib.handlers.pbkdf2 import pbkdf2_sha256 from common.utils import send_mail, ts_date @@ -101,6 +103,7 @@ def login_user( date=ts_date(), ip=ip, browser=browser, + hash=sha256(username.encode("u8")).hexdigest(), ) ) returned_value["status_code"] = HTTPStatus.CREATED @@ -177,11 +180,23 @@ def add_avatar(self, username, avatar): return {"status_code": HTTPStatus.CREATED, "message": "头像上传成功"} - def get_avatar(self, username): - user = self.db["users"].find_one({"username": username}) - img = user.get("avatar", b"") - mime = filetype.guess_mime(img) - return {"image": img, "content_type": mime} + def get_avatar(self, username, user_hash=None): + if user_hash: + user = self.db["users"].find_one({"hash": user_hash}) + else: + user = self.db["users"].find_one({"username": username}) + if user: + img = user.get("avatar", b"") + mime = filetype.guess_mime(img) + return {"image": img, "content_type": mime} + elif "@" in username: + # fallback to gravatar + url = f"https://gravatar.webp.se/avatar/{md5(username.encode('u8')).hexdigest()}" + img = requests.get(url).content + mime = filetype.guess_mime(img) + return {"image": img, "content_type": mime} + else: + return {"image": None, "content_type": None} class UserEmail(Mongo): diff --git a/yyetsweb/handlers/comment.py b/yyetsweb/handlers/comment.py index 8540ebbc5..48f6be423 100644 --- a/yyetsweb/handlers/comment.py +++ b/yyetsweb/handlers/comment.py @@ -1,12 +1,12 @@ #!/usr/bin/env python3 # coding: utf-8 -import re from http import HTTPStatus from pathlib import Path from tornado import gen, web from tornado.concurrent import run_on_executor +from common.utils import hide_phone from handlers.base import BaseHandler filename = Path(__file__).name.split(".")[0] @@ -15,13 +15,6 @@ class CommentHandler(BaseHandler): filename = filename - @staticmethod - def hide_phone(data: list): - for item in data: - if item["username"].isdigit() and len(item["username"]) == 11: - item["username"] = re.sub(r"(\d{3})\d{4}(\d{4})", r"\g<1>****\g<2>", item["username"]) - return data - @run_on_executor() def get_comment(self): query_id = self.get_argument("resource_id", "0") @@ -44,7 +37,7 @@ def get_comment(self): inner_page=inner_page, comment_id=comment_id, ) - self.hide_phone((comment_data["data"])) + hide_phone((comment_data["data"])) return comment_data @run_on_executor() @@ -144,7 +137,7 @@ def get_comment(self): self.set_status(HTTPStatus.BAD_REQUEST) return {"status": False, "message": "请提供 parent_id"} comment_data = self.instance.get_comment(parent_id, page, size) - self.hide_phone((comment_data["data"])) + hide_phone((comment_data["data"])) return comment_data @gen.coroutine @@ -162,7 +155,7 @@ def get_comment(self): page = int(self.get_argument("page", "1")) comment_data = self.instance.get_comment(page, size) - self.hide_phone((comment_data["data"])) + hide_phone((comment_data["data"])) return comment_data @gen.coroutine @@ -180,7 +173,7 @@ def search_comment(self): page = int(self.get_argument("page", "1")) keyword = self.get_argument("keyword", "") comment_data = self.instance.get_comment(page, size, keyword) - self.hide_phone((comment_data["data"])) + hide_phone((comment_data["data"])) return comment_data @gen.coroutine diff --git a/yyetsweb/handlers/resources.py b/yyetsweb/handlers/resources.py index 9a894fcde..c0fd3a6f0 100644 --- a/yyetsweb/handlers/resources.py +++ b/yyetsweb/handlers/resources.py @@ -19,7 +19,8 @@ class ResourceHandler(BaseHandler): @run_on_executor() def get_resource_data(self): - resource_id = int(self.get_query_argument("id")) + query = self.get_query_argument("id", None) + resource_id = int(query) if query.isdigit() else 0 username = self.get_current_user() if str(resource_id) in os.getenv("HIDDEN_RESOURCE", "").split(","): self.set_status(HTTPStatus.NOT_FOUND) diff --git a/yyetsweb/handlers/user.py b/yyetsweb/handlers/user.py index e3080f8ca..28ba76236 100644 --- a/yyetsweb/handlers/user.py +++ b/yyetsweb/handlers/user.py @@ -27,9 +27,7 @@ def login(self): ip = self.get_real_ip() browser = self.request.headers["user-agent"] - response = self.instance.login_user( - username, password, captcha, captcha_id, ip, browser - ) + response = self.instance.login_user(username, password, captcha, captcha_id, ip, browser) if response["status_code"] in (HTTPStatus.CREATED, HTTPStatus.OK): self.set_login(username) else: @@ -96,7 +94,8 @@ def update_avatar(self): @run_on_executor() def get_avatar(self, username): - data = self.instance.get_avatar(username) + user_hash = self.get_query_argument("hash", None) + data = self.instance.get_avatar(username, user_hash) if data["image"]: self.set_header("Content-Type", data["content_type"]) return data["image"] diff --git a/yyetsweb/server.py b/yyetsweb/server.py index 626137a16..6a42b1e47 100644 --- a/yyetsweb/server.py +++ b/yyetsweb/server.py @@ -167,6 +167,7 @@ def run_server(port, host): scheduler.add_job(sync_douban, trigger=CronTrigger.from_crontab("1 1 1 * *")) scheduler.add_job(entry_dump, trigger=CronTrigger.from_crontab("2 2 1 * *")) scheduler.add_job(Other().import_ban_user, "interval", seconds=300) + scheduler.add_job(Other().fill_user_hash, "interval", seconds=60) scheduler.add_job(Cloudflare().clear_fw, trigger=CronTrigger.from_crontab("0 0 */3 * *")) scheduler.add_job(YYSub().run, trigger=CronTrigger.from_crontab("0 1 * * *"))