From a2ad56576d0c55a6c9393202e29990f0d50f510b Mon Sep 17 00:00:00 2001 From: Braden Hilton Date: Sun, 11 Feb 2024 02:37:12 +0000 Subject: [PATCH] [weverse] add extractors --- docs/configuration.rst | 23 ++ docs/gallery-dl.conf | 7 + docs/supportedsites.md | 6 + gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/weverse.py | 598 +++++++++++++++++++++++++++++++ scripts/supportedsites.py | 8 + test/results/weverse.py | 216 +++++++++++ 7 files changed, 859 insertions(+) create mode 100644 gallery_dl/extractor/weverse.py create mode 100644 test/results/weverse.py diff --git a/docs/configuration.rst b/docs/configuration.rst index 08974379ec1..e719f5f6cd3 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -3936,6 +3936,29 @@ Description Download video files. +extractor.weverse.embeds +------------------------ +Type + ``bool`` +Default + ``true`` +Description + Control behavior on ``Media`` posts containing YouTube embeds. + + * ``true``: Extract embed URLs and download them. + * ``false``: Ignore embeds. + + +extractor.weverse.videos +------------------------ +Type + * ``bool`` +Default + ``true`` +Description + Download video files. + + extractor.ytdl.enabled ---------------------- Type diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 9f126524e32..1362a6c0915 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -366,6 +366,13 @@ "retweets": true, "videos": true }, + "weverse": { + "username": null, + "password": null, + "cookies": null, + "embeds": true, + "videos": true + }, "ytdl": { "enabled": false, diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 07aff3d9658..c423e97bf6e 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -985,6 +985,12 @@ Consider all listed sites to potentially be NSFW. Albums, Articles, Feeds, Images from Statuses, User Profiles, Videos + + Weverse + https://weverse.io/ + Feed Tab, Artist Tab, Media Files, Media Categories, Media Tabs, Member Profiles, Moments, Posts + Cookies + WikiArt.org https://www.wikiart.org/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index a6652497442..674a92d33d2 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -176,6 +176,7 @@ "webmshare", "webtoons", "weibo", + "weverse", "wikiart", "wikifeet", "wikimedia", diff --git a/gallery_dl/extractor/weverse.py b/gallery_dl/extractor/weverse.py new file mode 100644 index 00000000000..d57c269c51e --- /dev/null +++ b/gallery_dl/extractor/weverse.py @@ -0,0 +1,598 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://weverse.io/""" + +from .common import Extractor, Message +from .. import text, exception +from ..cache import cache +import binascii +import hashlib +import hmac +import time +import urllib.parse +import uuid +from collections import OrderedDict + +BASE_PATTERN = r"(?:https?://)?(?:m\.)?weverse\.io/([^/?#]+)" +MEMBER_ID_PATTERN = r"/([a-f0-9]+)" +POST_ID_PATTERN = r"/(\d-\d+)" + + +class WeverseExtractor(Extractor): + """Base class for weverse extractors""" + category = "weverse" + filename_fmt = "{category}_{id}.{extension}" + archive_fmt = "{category}_{post_id}_{id}" + cookies_domain = ".weverse.io" + cookies_names = ("we2_access_token",) + root = "https://weverse.io" + + def __init__(self, match): + Extractor.__init__(self, match) + self.community_keyword = match.group(1) + + def _init(self): + self.embeds = self.config("embeds", True) + self.videos = self.config("videos", True) + + def items(self): + self.login() + self.api = WeverseAPI(self) + + post = self.post() + published_at = text.parse_timestamp( + post["publishedAt"] / 1000) + data = { + "date" : published_at, + "post_url" : post["shareUrl"], + "post_id" : post["postId"], + "post_type" : post["postType"], + "section_type" : post["sectionType"], + "hide_from_artist": post["hideFromArtist"], + "membership_only" : post["membershipOnly"], + } + + if post.get("tags", []): + data["tags"] = post["tags"] + + if "author" in post: + author = { + "id" : post["author"]["memberId"], + "name" : post["author"]["profileName"], + "profile_type": post["author"]["profileType"], + } + if "artistOfficialProfile" in post["author"]: + artist_profile = post["author"]["artistOfficialProfile"] + author["name"] = artist_profile["officialName"] + data["author"] = author + + if "community" in post: + community = { + "id" : post["community"]["communityId"], + "name" : post["community"]["communityName"], + "artist_code": post["community"]["artistCode"], + } + data["community"] = community + + files = [] + if post["attachment"]: + self._extract_post(post, files) + elif post["extension"]: + if isinstance(self, WeverseMomentExtractor): + self._extract_moment(post, files) + else: + self._extract_media(post, files) + data["count"] = len(files) + + yield Message.Directory, data + for file in files: + file.update(data) + url = file.pop("url") + yield Message.Url, url, file + + def _extract_image(self, image): + url = image["url"] + return { + "id" : image["photoId"], + "url" : url, + "width" : image["width"], + "height" : image["height"], + "extension": text.ext_from_url(url), + } + + def _extract_video(self, video): + video_id = video["videoId"] + if isinstance(self, WeverseMediaExtractor): + master_id = (video.get("uploadInfo", {}).get( + "videoId") or video["infraVideoId"]) + best_video = self.get_best_video( + self.api.get_media_video_list(video_id, master_id)) + else: + best_video = self.get_best_video( + self.api.get_post_video_list(video_id)) + url = best_video["source"] + return { + "id" : video_id, + "url" : url, + "width" : best_video["encodingOption"]["width"], + "height" : best_video["encodingOption"]["height"], + "extension": text.ext_from_url(url), + } + + def _extract_embed(self, embed): + return { + "id" : embed["youtubeVideoId"], + "extension": None, + "url" : "ytdl:" + embed["videoPath"], + } + + def _extract_post(self, post, files): + attachments = {} + attachments.update(post["attachment"].get('photo', {})) + attachments.update(post["attachment"].get('video', {})) + + # the order of attachments in the api response can differ to the order + # of attachments on the site + attachment_order = list(text.extract_iter(post["body"], 'id="', '"')) + for index, id in enumerate(attachment_order, 1): + file = { + "num": index, + } + attachment = attachments[id] + if "photoId" in attachment: + file.update(self._extract_image(attachment)) + else: + file.update(self._extract_video(attachment)) + files.append(file) + + def _extract_moment(self, post, files): + moment = next(post["extension"][key] + for key in ("moment", "momentW1") + if key in post["extension"]) + if not moment: + return + + file = { + "num" : 1, + "expire_at": text.parse_timestamp(moment["expireAt"] / 1000), + } + if "photo" in moment: + file.update(self._extract_image(moment["photo"])) + else: + if not self.videos: + return + file.update(self._extract_video(moment["video"])) + + files.append(file) + + def _extract_media(self, post, files): + extension = post["extension"] + categories = [{ + "id" : category["id"], + "type" : category["type"], + "title": category["title"], + } for category in extension["mediaInfo"]["categories"]] + data = { + "title" : extension["mediaInfo"]["title"], + "media_type": extension["mediaInfo"]["mediaType"], + "categories": categories, + } + + if "image" in extension: + for index, photo in enumerate(extension["image"]["photos"], 1): + file = data.copy() + file["num"] = index + file.update(self._extract_image(photo)) + files.append(file) + elif "video" in extension: + if not self.videos: + return + file = data + file.update(self._extract_video(extension["video"])) + files.append(file) + else: + if not self.embeds or not self.videos: + return + file = data + file["num"] = 1 + file.update(self._extract_embed(extension["youtube"])) + files.append(file) + + def get_best_video(self, videos): + return max(videos, key=lambda video: + video["encodingOption"]["width"] * + video["encodingOption"]["height"]) + + def post(self): + return {} + + def login(self): + if self.cookies_check(self.cookies_names): + return + + username, password = self._get_auth_info() + if username: + self.cookies_update(_login_impl(self, username, password)) + + +class WeversePostExtractor(WeverseExtractor): + """Extractor for a weverse community post""" + subcategory = "post" + directory_fmt = ("{category}", "{community[name]}", + "{author[id]}", "{post_id}") + pattern = BASE_PATTERN + r"/(?:artist|fanpost)" + POST_ID_PATTERN + example = "https://weverse.io/abcdef/artist/1-123456789" + + def __init__(self, match): + WeverseExtractor.__init__(self, match) + self.post_id = match.group(2) + + def post(self): + return self.api.get_post(self.post_id) + + +class WeverseMemberExtractor(WeverseExtractor): + """Extractor for all posts from a weverse community member""" + subcategory = "member" + pattern = BASE_PATTERN + "/profile" + MEMBER_ID_PATTERN + r"$" + example = ("https://weverse.io/abcdef" + "/profile/a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5") + + def __init__(self, match): + WeverseExtractor.__init__(self, match) + self.member_id = match.group(2) + + def items(self): + self.login() + self.api = WeverseAPI(self) + + data = {"_extractor": WeversePostExtractor} + posts = self.api.get_member_posts(self.member_id) + for post in posts: + yield Message.Queue, post["shareUrl"], data + + +class WeverseFeedExtractor(WeverseExtractor): + """Extractor for a weverse community feed""" + subcategory = "feed" + pattern = BASE_PATTERN + r"/(feed|artist)$" + example = "https://weverse.io/abcdef/feed" + + def __init__(self, match): + WeverseExtractor.__init__(self, match) + self.feed_name = match.group(2) + + def items(self): + self.login() + self.api = WeverseAPI(self) + + data = {"_extractor": WeversePostExtractor} + posts = self.api.get_feed_posts(self.community_keyword, self.feed_name) + for post in posts: + yield Message.Queue, post["shareUrl"], data + + +class WeverseMomentExtractor(WeverseExtractor): + """Extractor for a weverse community artist moment""" + subcategory = "moment" + pattern = (BASE_PATTERN + "/moment" + MEMBER_ID_PATTERN + + "/post" + POST_ID_PATTERN) + example = ("https://weverse.io/abcdef" + "/moment/a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5" + "/post/1-123456789") + + def __init__(self, match): + WeverseExtractor.__init__(self, match) + self.post_id = match.group(3) + + def post(self): + return self.api.get_post(self.post_id) + + +class WeverseMomentsExtractor(WeverseExtractor): + """Extractor for all moments from a weverse community artist""" + subcategory = "moments" + pattern = BASE_PATTERN + "/moment" + MEMBER_ID_PATTERN + r"$" + example = ("https://weverse.io/abcdef" + "/moment/a0b1c2d3e4f5a6b7c8d9e0f1a2b3c4d5") + + def __init__(self, match): + WeverseExtractor.__init__(self, match) + self.member_id = match.group(2) + + def items(self): + self.login() + self.api = WeverseAPI(self) + + data = {"_extractor": WeverseMomentExtractor} + moments = self.api.get_member_moments(self.member_id) + for moment in moments: + yield Message.Queue, moment["shareUrl"], data + + +class WeverseMediaExtractor(WeverseExtractor): + """Extractor for a weverse community media post""" + subcategory = "media" + directory_fmt = ("{category}", "{community[name]}", + "media", "{post_id}") + pattern = BASE_PATTERN + "/media" + POST_ID_PATTERN + example = "https://weverse.io/abcdef/media/1-123456789" + + def __init__(self, match): + WeverseExtractor.__init__(self, match) + self.post_id = match.group(2) + + def post(self): + return self.api.get_post(self.post_id) + + +class WeverseMediaTabExtractor(WeverseExtractor): + """Extractor for the media tab of a weverse community""" + subcategory = "media-tab" + pattern = BASE_PATTERN + r"/media(?:/(all|membership|new))?$" + example = "https://weverse.io/abcdef/media" + + def __init__(self, match): + WeverseExtractor.__init__(self, match) + self.tab_name = match.group(2) or "all" + + def items(self): + self.login() + self.api = WeverseAPI(self) + + data = {"_extractor": WeverseMediaExtractor} + if self.tab_name == "new": + get_media = self.api.get_latest_community_media + elif self.tab_name == "membership": + get_media = self.api.get_membership_community_media + else: + get_media = self.api.get_all_community_media + medias = get_media(self.community_keyword) + for media in medias: + yield Message.Queue, media["shareUrl"], data + + +class WeverseMediaCategoryExtractor(WeverseExtractor): + """Extractor for media by category of a weverse community""" + subcategory = "media-category" + pattern = BASE_PATTERN + r"/media/category/(\d+)" + example = "https://weverse.io/abcdef/media/category/1234" + + def __init__(self, match): + WeverseExtractor.__init__(self, match) + self.media_category = match.group(2) + + def items(self): + self.login() + self.api = WeverseAPI(self) + + data = {"_extractor": WeverseMediaExtractor} + medias = self.api.get_media_by_category_id(self.media_category) + for media in medias: + yield Message.Queue, media["shareUrl"], data + + +class WeverseAPI(): + """Interface for the Weverse API""" + BASE_API_URL = "https://global.apis.naver.com" + WMD_API_URL = BASE_API_URL + "/weverse/wevweb" + VOD_API_URL = BASE_API_URL + "/rmcnmv/rmcnmv" + + APP_ID = "be4d79eb8fc7bd008ee82c8ec4ff6fd4" + SECRET = "1b9cb6378d959b45714bec49971ade22e6e24e42" + + def __init__(self, extractor): + self.extractor = extractor + + cookies = extractor.cookies + token_cookie_name = extractor.cookies_names[0] + cookies_domain = extractor.cookies_domain + self.access_token = cookies.get(token_cookie_name, + domain=cookies_domain) + self.headers = ({"Authorization": "Bearer " + self.access_token} + if self.access_token else None) + + def _endpoint_with_params(self, endpoint, params): + params_delimiter = "?" + if "?" in endpoint: + params_delimiter = "&" + return endpoint + params_delimiter + urllib.parse.urlencode( + query=params) + + def _message_digest(self, endpoint, params, timestamp): + key = self.SECRET.encode() + url = self._endpoint_with_params(endpoint, params) + message = "{}{}".format(url[:255], timestamp).encode() + hash = hmac.new(key, message, hashlib.sha1).digest() + return binascii.b2a_base64(hash).rstrip().decode() + + def _apply_no_auth(self, endpoint, params): + if not endpoint.endswith("/preview"): + endpoint += "/preview" + params.update({"fieldSet": "postForPreview"}) + return endpoint, params + + def _is_text_only(self, post): + for key in ("attachment", "extension"): + if post.get(key, {}): + return False + if "summary" in post: + s = post["summary"] + if s.get("videoCount", 0) + s.get("photoCount", 0) > 0: + return False + return True + + def get_in_key(self, video_id): + endpoint = "/video/v1.1/vod/{}/inKey".format(video_id) + return self._call_wmd(endpoint, method="POST")["inKey"] + + def get_community_id(self, community_keyword): + endpoint = "/community/v1.0/communityIdUrlPathByUrlPathArtistCode" + params = {"keyword": community_keyword} + return self._call_wmd(endpoint, params)["communityId"] + + def get_post(self, post_id): + endpoint = "/post/v1.0/post-{}".format(post_id) + params = {"fieldSet": "postV1"} + if not self.access_token: + endpoint, params = self._apply_no_auth(endpoint, params) + return self._call_wmd(endpoint, params) + + def get_media_video_list(self, video_id, master_id): + in_key = self.get_in_key(video_id) + url = self.VOD_API_URL + "/vod/play/v2.0/{}".format(master_id) + params = {"key": in_key} + res = self._call(url, params=params) + videos = res["videos"]["list"] + return videos + + def get_post_video_list(self, video_id): + endpoint = "/cvideo/v1.0/cvideo-{}/playInfo".format(video_id) + params = {"videoId": video_id} + res = self._call_wmd(endpoint, params=params) + videos = res["playInfo"]["videos"]["list"] + return videos + + def get_member_posts(self, member_id): + endpoint = "/post/v1.0/member-{}/posts".format(member_id) + params = { + "fieldSet" : "postsV1", + "filterType": "DEFAULT", + "limit" : 20, + "sortType" : "LATEST", + } + return self._pagination(endpoint, params) + + def get_feed_posts(self, community_keyword, feed_name): + community_id = self.get_community_id(community_keyword) + endpoint = "/post/v1.0/community-{}/{}TabPosts".format( + community_id, feed_name) + params = { + "fieldSet" : "postsV1", + "limit" : 20, + "pagingType": "CURSOR", + } + return self._pagination(endpoint, params) + + def get_latest_community_media(self, community_keyword): + community_id = self.get_community_id(community_keyword) + endpoint = "/media/v1.0/community-{}/more".format( + community_id) + params = { + "fieldSet" : "postsV1", + "filterType": "RECENT", + } + return self._pagination(endpoint, params) + + def get_membership_community_media(self, community_keyword): + community_id = self.get_community_id(community_keyword) + endpoint = "/media/v1.0/community-{}/more".format( + community_id) + params = { + "fieldSet" : "postsV1", + "filterType": "MEMBERSHIP", + } + return self._pagination(endpoint, params) + + def get_all_community_media(self, community_keyword): + community_id = self.get_community_id(community_keyword) + endpoint = "/media/v1.0/community-{}/searchAllMedia".format( + community_id) + params = { + "fieldSet" : "postsV1", + "sortOrder": "DESC", + } + return self._pagination(endpoint, params) + + def get_media_by_category_id(self, category_id): + endpoint = "/media/v1.0/category-{}/mediaPosts".format(category_id) + params = { + "fieldSet" : "postsV1", + "sortOrder": "DESC", + } + return self._pagination(endpoint, params) + + def get_member_moments(self, member_id): + endpoint = "/post/v1.0/member-{}/posts".format(member_id) + params = { + "fieldSet" : "postsV1", + "filterType": "MOMENT", + "limit" : 1, + } + return self._pagination(endpoint, params) + + def _call(self, url, **kwargs): + while True: + try: + return self.extractor.request(url, **kwargs).json() + except exception.HttpError as exc: + if exc.response.status_code == 401: + raise exception.AuthenticationError() + if exc.response.status_code == 403: + raise exception.AuthorizationError( + "Post requires membership") + if exc.response.status_code == 404: + raise exception.NotFoundError(self.extractor.subcategory) + self.extractor.log.debug(exc) + return + + def _call_wmd(self, endpoint, params=None, **kwargs): + if params is None: + params = {} + params.update({ + "appId" : self.APP_ID, + "language": "en", + "os" : "WEB", + "platform": "WEB", + "wpf" : "pc", + }) + # the param order is important for the message digest + params = OrderedDict(sorted(params.items())) + timestamp = int(time.time() * 1000) + message_digest = self._message_digest(endpoint, params, timestamp) + params.update({ + "wmsgpad": timestamp, + "wmd" : message_digest, + }) + return self._call(self.WMD_API_URL + endpoint, params=params, + headers=self.headers, **kwargs) + + def _pagination(self, endpoint, params=None): + if not self.access_token: + raise exception.AuthenticationError() + if params is None: + params = {} + while True: + res = self._call_wmd(endpoint, params) + for post in res["data"]: + if not self._is_text_only(post): + yield post + np = res.get("paging", {}).get("nextParams", {}) + if "after" not in np: + return + params["after"] = np["after"] + + +@cache(maxage=365*24*3600, keyarg=1) +def _login_impl(extr, username, password): + url = ("https://accountapi.weverse.io" + "/web/api/v2/auth/token/by-credentials") + data = {"email": username, "password": password} + headers = { + "x-acc-app-secret" : "5419526f1c624b38b10787e5c10b2a7a", + "x-acc-app-version": "3.0.0", + "x-acc-language" : "en", + "x-acc-service-id" : "weverse", + "x-acc-trace-id" : str(uuid.uuid4()), + } + extr.log.info("Logging in as %s", username) + res = extr.request( + url, method="POST", json=data, headers=headers).json() + if "accessToken" not in res: + extr.log.warning( + "Unable to log in as %s, proceeding without auth", username) + return {cookie.name: cookie.value for cookie in extr.cookies} diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 98a232345bb..49624268e7a 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -304,6 +304,13 @@ "home": "", "newvideo": "", }, + "weverse": { + "feed" : "Feed Tab, Artist Tab", + "media-category": "Media Categories", + "media-tab" : "Media Tabs", + "member" : "Member Profiles", + "moments" : "", + }, "wikiart": { "artists": "Artist Listings", }, @@ -386,6 +393,7 @@ "vipergirls" : "Supported", "wallhaven" : _APIKEY_WH, "weasyl" : _APIKEY_WY, + "weverse" : _COOKIES, "zerochan" : "Supported", } diff --git a/test/results/weverse.py b/test/results/weverse.py new file mode 100644 index 00000000000..e7d6c5aba85 --- /dev/null +++ b/test/results/weverse.py @@ -0,0 +1,216 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import weverse + +IMAGE_URL_PATTERN = r"https://phinf\.wevpstatic\.net/.+\.(?:gif|jpe?g|png|webp)$" +VIDEO_URL_PATTERN = r"https://weverse-rmcnmv\.akamaized\.net/.+\.(?:mp4|webm)(?:\?.+)?$" +COMBINED_URL_PATTERN = "(?i)" + IMAGE_URL_PATTERN + "|" + VIDEO_URL_PATTERN + +__tests__ = ( +{ + "#url" : "https://weverse.io/lesserafim/artist/4-147791342", + "#comment" : "post containing both a video and image", + "#category": ("", "weverse", "post"), + "#class" : weverse.WeversePostExtractor, + "#pattern" : COMBINED_URL_PATTERN, + "#count" : 2, + + "date" : "dt:2024-01-18 06:08:35", + "post_url" : "https://weverse.io/lesserafim/artist/4-147791342", + "post_id" : "4-147791342", + "post_type" : "NORMAL", + "section_type" : "ARTIST", + "hide_from_artist": False, + "membership_only" : False, + "author" : { + "id" : "b60d95bc3b71f4d97b28ac1b971cc641", + "name" : "KAZUHA", + "profile_type": "ARTIST", + }, + "community" : { + "id" : 47, + "name" : "LE SSERAFIM", + "artist_code": "LESSERAFIM", + }, +}, + +{ + "#url" : "https://weverse.io/lesserafim/artist/4-150863209", + "#comment" : "text only", + "#category": ("", "weverse", "post"), + "#class" : weverse.WeversePostExtractor, + "#count" : 0, +}, + +{ + "#url" : "https://weverse.io/dreamcatcher/artist/3-138146100", + "#comment" : "the order of the files displayed on the site and returned by the api do not always match" + "the id of the second file returned by the api is `2-274423384`" + "the id of the second file displayed on the site is `3-274413871`", + "#category": ("", "weverse", "post"), + "#class" : weverse.WeversePostExtractor, + "#pattern" : COMBINED_URL_PATTERN, + "#range" : "2", + "#count" : 1, + + "id" : "3-274413871", + "num": 2, +}, + +{ + "#url" : "https://weverse.io/dreamcatcher/fanpost/2-135105553", + "#comment" : "fan post", + "#category": ("", "weverse", "post"), + "#class" : weverse.WeversePostExtractor, + "#pattern" : COMBINED_URL_PATTERN, + "#count" : 1, + + "section_type": "FEED", + "author" : { + "profile_type": "FAN", + }, +}, + +{ + "#url" : "https://weverse.io/dreamcatcher/profile/e89820ec1a72d7255120284ca3aeafa5", + "#category": ("", "weverse", "member"), + "#class" : weverse.WeverseMemberExtractor, + "#pattern" : weverse.WeversePostExtractor.pattern, + "#auth" : True, +}, + +{ + "#url" : "https://weverse.io/dreamcatcher/feed", + "#comment" : "feed tab (fan posts)" + "each pagination call returns up to 20 items", + "#category": ("", "weverse", "feed"), + "#class" : weverse.WeverseFeedExtractor, + "#pattern" : weverse.WeversePostExtractor.pattern, + "#auth" : True, + "#range" : "21", +}, + +{ + "#url" : "https://weverse.io/dreamcatcher/artist", + "#comment" : "artist tab (artist posts)" + "each pagination call returns up to 20 items", + "#category": ("", "weverse", "feed"), + "#class" : weverse.WeverseFeedExtractor, + "#pattern" : weverse.WeversePostExtractor.pattern, + "#auth" : True, + "#range" : "21", +}, + +{ + "#url" : "https://weverse.io/dreamcatcher/moment/e89820ec1a72d7255120284ca3aeafa5/post/2-111675163", + "#comment" : "moment", + "#category": ("", "weverse", "moment"), + "#class" : weverse.WeverseMomentExtractor, + "#pattern" : COMBINED_URL_PATTERN, + "#count" : 1, + + "width" : 1080, + "height" : 1920, + "date" : "dt:2023-01-09 06:25:41", + "expire_at": "dt:2023-01-10 06:25:41", +}, + +{ + "#url" : "https://weverse.io/dreamcatcher/moment/785506b50e7890c3b81491f20728ee82/post/2-101327656", + "#comment" : "momentW1", + "#category": ("", "weverse", "moment"), + "#class" : weverse.WeverseMomentExtractor, + "#pattern" : COMBINED_URL_PATTERN, + "#count" : 1, + + "width" : 1128, + "height" : 1504, + "date" : "dt:2022-07-17 00:24:48", + "expire_at": "dt:2022-07-18 00:24:48", +}, + +{ + "#url" : "https://weverse.io/dreamcatcher/moment/e89820ec1a72d7255120284ca3aeafa5", + "#comment" : "each pagination call returns 1 item", + "#category": ("", "weverse", "moments"), + "#class" : weverse.WeverseMomentsExtractor, + "#pattern" : weverse.WeverseMomentExtractor.pattern, + "#auth" : True, + "#range" : "2", +}, + +{ + "#url" : "https://weverse.io/lesserafim/media/0-128617470", + "#comment" : "image", + "#category": ("", "weverse", "media"), + "#class" : weverse.WeverseMediaExtractor, + "#pattern" : COMBINED_URL_PATTERN, + "#count" : 5, + + "media_type": "IMAGE", + "categories": [ + { + "id" : 1091, + "type" : "MEDIA", + "title": "PHOTOBOOK", + }, + ], + "community" : { + "name": "LE SSERAFIM", + }, +}, + +{ + "#url" : "https://weverse.io/lesserafim/media/1-128435266", + "#comment" : "video", + "#category": ("", "weverse", "media"), + "#class" : weverse.WeverseMediaExtractor, + "#pattern" : COMBINED_URL_PATTERN, + "#count" : 1, + + "width" : 1080, + "height" : 1920, + "media_type": "VOD", + "categories": [ + { + "id" : 1532, + "type" : "MEDIA", + "title": "Perfect Night", + } + ], +}, + +{ + "#url" : "https://weverse.io/dreamcatcher/media/1-128875973", + "#comment" : "embed", + "#category": ("", "weverse", "media"), + "#class" : weverse.WeverseMediaExtractor, + + "post_type": "YOUTUBE", +}, + +{ + "#url" : "https://weverse.io/dreamcatcher/media", + "#comment" : "each pagination call returns up to 10 items", + "#category": ("", "weverse", "media-tab"), + "#class" : weverse.WeverseMediaTabExtractor, + "#pattern" : weverse.WeverseMediaExtractor.pattern, + "#auth" : True, + "#range" : "11", +}, + +{ + "#url" : "https://weverse.io/lesserafim/media/category/494", + "#comment" : "each pagination call returns up to 10 items", + "#category": ("", "weverse", "media-category"), + "#class" : weverse.WeverseMediaCategoryExtractor, + "#pattern" : weverse.WeverseMediaExtractor.pattern, + "#auth" : True, + "#range" : "11", +}, + +)