diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 8f54b15765..003dcaa979 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -889,6 +889,12 @@ Consider all sites to be NSFW unless otherwise known. Files + + Urlgalleries + https://urlgalleries.net/ + Galleries + + Vipergirls https://vipergirls.to/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 72239d5cbe..d074de22eb 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -155,6 +155,7 @@ "tumblrgallery", "twibooru", "twitter", + "urlgalleries", "unsplash", "uploadir", "urlshortener", diff --git a/gallery_dl/extractor/urlgalleries.py b/gallery_dl/extractor/urlgalleries.py new file mode 100644 index 0000000000..b21709a967 --- /dev/null +++ b/gallery_dl/extractor/urlgalleries.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://urlgalleries.net/""" + +from .common import GalleryExtractor, Message +from .. import text + + +class UrlgalleriesGalleryExtractor(GalleryExtractor): + """Base class for Urlgalleries extractors""" + category = "urlgalleries" + root = "urlgalleries.net" + request_interval = (0.5, 1.0) + pattern = r"(?:https?://)(?:(\w+)\.)?urlgalleries\.net/(?:[\w-]+-)?(\d+)" + example = "https://blog.urlgalleries.net/gallery-12345/TITLE" + + def __init__(self, match): + self.blog, self.gallery_id = match.groups() + url = "https://{}.urlgalleries.net/porn-gallery-{}/?a=10000".format( + self.blog, self.gallery_id) + GalleryExtractor.__init__(self, match, url) + + def items(self): + page = self.request(self.gallery_url).text + imgs = self.images(page) + data = self.metadata(page) + data["count"] = len(imgs) + del page + + root = "https://{}.urlgalleries.net".format(self.blog) + yield Message.Directory, data + for data["num"], img in enumerate(imgs, 1): + response = self.request( + root + img, method="HEAD", allow_redirects=False) + yield Message.Queue, response.headers["Location"], data + + def metadata(self, page): + extr = text.extract_from(page) + return { + "gallery_id": self.gallery_id, + "_site": extr(' title="', '"'), # site name + "blog" : text.unescape(extr(' title="', '"')), + "_rprt": extr(' title="', '"'), # report button + "title": text.unescape(extr(' title="', '"').strip()), + "date" : text.parse_datetime( + extr(" images in gallery | ", "<"), "%B %d, %Y %H:%M"), + } + + def images(self, page): + imgs = text.extr(page, 'id="wtf"', "") + return list(text.extract_iter(imgs, " href='", "'")) diff --git a/test/results/urlgalleries.py b/test/results/urlgalleries.py new file mode 100644 index 0000000000..88a321e769 --- /dev/null +++ b/test/results/urlgalleries.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import urlgalleries + + +__tests__ = ( +{ + "#url" : "https://photos2q.urlgalleries.net/porn-gallery-7851311/clarice-window-8", + "#category": ("", "urlgalleries", "gallery"), + "#class" : urlgalleries.UrlgalleriesGalleryExtractor, + "#range" : "1-3", + "#urls" : ( + "https://fappic.com/x207mqkn2463/4gq1yv.jpg", + "https://fappic.com/q684ua2rp0j9/4gq1xv.jpg", + "https://fappic.com/8vf3n8fgz9po/4gq1ya.jpg", + ), + + "blog" : "photos2q", + "count" : 39, + "date" : "dt:2023-12-08 13:59:00", + "gallery_id": "7851311", + "num" : range(1, 3), + "title" : "Clarice window 8", +}, + +{ + "#url" : "https://dreamer.urlgalleries.net/7645840", + "#category": ("", "urlgalleries", "gallery"), + "#class" : urlgalleries.UrlgalleriesGalleryExtractor, + "#range" : "1-3", + "#urls" : ( + "https://www.fappic.com/vj7up04ny487/AmourAngels-0001.jpg", + "https://www.fappic.com/zfgsmpm36iyv/AmourAngels-0002.jpg", + "https://www.fappic.com/rqpt37rdbwa5/AmourAngels-0003.jpg", + ), + + "blog" : "Dreamer", + "count" : 105, + "date" : "dt:2020-03-10 21:17:00", + "gallery_id": "7645840", + "num" : range(1, 3), + "title" : "Angelika - Rustic Charm - AmourAngels 2016-09-27", +}, + +)