From 83fc9a3ab9b885899a296f7605b45b011850b434 Mon Sep 17 00:00:00 2001 From: Official-Husko <27901510+Official-Husko@users.noreply.github.com> Date: Fri, 22 Mar 2024 22:20:15 +0100 Subject: [PATCH 1/2] improvements and bug fixes fix: #19 fix: #20 refactor: add comments in furbooru.py --- README.md | 11 +++- main.py | 38 ++++++++----- modules/e6systems.py | 131 +++++++++++++++++-------------------------- modules/furbooru.py | 130 ++++++++++++++++++++++-------------------- modules/rule34.py | 60 +++++++++----------- run.bat | 1 + 6 files changed, 181 insertions(+), 190 deletions(-) create mode 100644 run.bat diff --git a/README.md b/README.md index c184c09..9c90128 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,15 @@ Welcome to the successor of the [multporn image downloader v1][2] & [v2][1] and ### Preview ![preview](https://github.com/Official-Husko/NN-Downloader/blob/dev/preview/preview.gif) -[Windows Download][14] | [Linux Download][21] (Thanks to [incognibro538](https://github.com/incognibro538)) +[Windows Download][14] | [Linux Download][21] (Thanks to @incognibro538) + +
+ +#### Features: +- Portable +- Proxy Support +- AI Training Compatible +- Avoid Duplicates
@@ -46,7 +54,6 @@ Welcome to the successor of the [multporn image downloader v1][2] & [v2][1] and Further sites can be added. Just open a [support ticket][11] with the URL to the site. -


diff --git a/main.py b/main.py index 79006b0..c48a841 100644 --- a/main.py +++ b/main.py @@ -6,7 +6,7 @@ import sys import inquirer -version = "1.6.1" +version = "1.6.2" if os.name == 'nt': from ctypes import windll @@ -15,7 +15,7 @@ proxy_list = [] header = {"User-Agent":f"nn-downloader/{version} (by Official Husko on GitHub)"} needed_folders = ["db", "media"] -database_list = ["e621", "furbooru", "rule34", "e6ai", "e926"] +database_list = ["e621", "e6ai", "e926", "furbooru", "rule34"] unsafe_chars = ["/", "\\", ":", "*", "?", "\"", "<", ">", "|", "\0", "$", "#", "@", "&", "%", "!", "`", "^", "(", ")", "{", "}", "[", "]", "=", "+", "~", ",", ";"] if sys.gettrace() is not None: @@ -107,7 +107,7 @@ def clear_screen(): site = answers.get("selection").lower() - if site in ["e621", "e6ai", "e926"]: + if site in ["e621", "e6ai", "e926", "furbooru", "rule34"]: print(colored("Please enter the tags you want to use.", "green")) user_tags = input(">> ").lower() @@ -121,33 +121,41 @@ def clear_screen(): max_sites = input(">> ").lower() print("") - apiUser = config["user_credentials"][site]["apiUser"] - apiKey = config["user_credentials"][site]["apiKey"] + if site in ["e621", "e6ai", "e926"]: + api_user = config.get("user_credentials",{}).get(site, {}).get("apiUser", "") + api_key = config.get("user_credentials", {}).get(site, {}).get("apiKey", "") if oneTimeDownload == True: with open(f"db/{site}.db", "r") as db_reader: database = db_reader.read().splitlines() - if apiKey == "" or apiUser == "": - print(colored("Please add your Api Key into the config.json", "red")) - sleep(5) else: - output = E6System.Fetcher(user_tags=user_tags, user_blacklist=config["blacklisted_tags"], proxy_list=proxy_list, max_sites=max_sites, user_proxies=config["proxies"], apiUser=apiUser, apiKey=apiKey, header=header, db=database, site=site, ai_training=ai_training) - + database = False + if api_key == "" or api_user == "": + print(colored("Please add your API Key into the config.json", "red")) + sleep(10) + sys.exit(0) + else: + output = E6System.fetcher(user_tags=user_tags, user_blacklist=config["blacklisted_tags"], proxy_list=proxy_list, max_sites=max_sites, user_proxies=config["proxies"], api_user=api_user, api_key=api_key, header=header, db=database, site=site, ai_training=ai_training) + elif site == "rule34": if oneTimeDownload == True: with open("db/rule34.db", "r") as db_reader: database = db_reader.read().splitlines() - output = RULE34.Fetcher(user_tags=user_tags, user_blacklist=config["blacklisted_tags"], proxy_list=proxy_list, max_sites=max_sites, user_proxies=config["proxies"], header=header, db=database) + else: + database = False + output = RULE34.fetcher(user_tags=user_tags, user_blacklist=config["blacklisted_tags"], proxy_list=proxy_list, max_sites=max_sites, user_proxies=config["proxies"], header=header, db=database) elif site == "furbooru": - apiKey = config["user_credentials"]["furbooru"]["apiKey"] + api_key = config.get("user_credentials", {}).get(site, {}).get("apiKey", "") if oneTimeDownload == True: with open("db/furbooru.db", "r") as db_reader: database = db_reader.read().splitlines() - if apiKey == "": - print(colored("Please add your Api Key into the config.json", "red")) + else: + database = False + if api_key == "": + print(colored("Please add your API Key into the config.json", "red")) sleep(5) else: - output = FURBOORU.Fetcher(user_tags=user_tags, user_blacklist=config["blacklisted_tags"], proxy_list=proxy_list, max_sites=max_sites, user_proxies=config["proxies"], apiKey=apiKey, header=header, db=database) + output = FURBOORU.fetcher(user_tags=user_tags, user_blacklist=config["blacklisted_tags"], proxy_list=proxy_list, max_sites=max_sites, user_proxies=config["proxies"], api_key=api_key, header=header, db=database) elif site == "multporn": print(colored("Please enter the link. (e.g. https://multporn.net/comics/double_trouble_18)", "green")) diff --git a/modules/e6systems.py b/modules/e6systems.py index 7cc2e5f..8990f3c 100644 --- a/modules/e6systems.py +++ b/modules/e6systems.py @@ -1,119 +1,94 @@ -from requests.auth import HTTPBasicAuth -import requests +import os +import json import random +import requests +from requests.auth import HTTPBasicAuth from termcolor import colored from alive_progress import alive_bar from time import sleep from datetime import datetime -import os -import json from main import unsafe_chars -now = datetime.now() -dt_now = now.strftime("%d-%m-%Y_%H-%M-%S") -class E6System(): - def Fetcher(user_tags, user_blacklist, proxy_list, max_sites, user_proxies, apiUser ,apiKey, header, db, site, ai_training): +class E6System: + @staticmethod + def fetcher(user_tags, user_blacklist, proxy_list, max_sites, user_proxies, api_user, api_key, header, db, site, ai_training): try: approved_list = [] + now = datetime.now() + dt_now = now.strftime("%d-%m-%Y_%H-%M-%S") page = 1 + while True: URL = f"https://{site}.net/posts.json?tags={user_tags}&limit=320&page={page}" - if user_proxies == True: - proxy = random.choice(proxy_list) - raw_req = requests.get(URL, headers=header, proxies=proxy, auth=HTTPBasicAuth(apiUser, apiKey)) - else: - raw_req = requests.get(URL, headers=header, auth=HTTPBasicAuth(apiUser, apiKey)) - + proxy = random.choice(proxy_list) if user_proxies else None + raw_req = requests.get(URL, headers=header, proxies=proxy, auth=HTTPBasicAuth(api_user, api_key)) req = raw_req.json() - - try: - if req["message"] == "You cannot go beyond page 750. Please narrow your search terms.": - print(colored(req["message"] + " (API limit)", "red")) - sleep(5) - break - except: - pass - if req["posts"] == []: + if "message" in req and req["message"] == "You cannot go beyond page 750. Please narrow your search terms.": + print(colored(req["message"] + " (API limit)", "red")) + sleep(5) + break + + if not req["posts"]: print(colored("No images found or all downloaded! Try different tags.", "yellow")) sleep(5) break - + elif page == max_sites: print(colored(f"Finished Downloading {max_sites} of {max_sites} pages.", "yellow")) sleep(5) break - else: + else: for item in req["posts"]: image_id = item["id"] - image_address = item["file"]["url"] - post_tags1 = item["tags"]["general"] - post_tags2 = item["tags"]["species"] - post_tags3 = item["tags"]["character"] - if site == "e6ai": - post_tags4 = item["tags"]["director"] - post_tags5 = item["tags"]["meta"] - else: - post_tags4 = item["tags"]["copyright"] - post_tags5 = item["tags"]["artist"] - - if ai_training == True: - meta_tags = item["tags"] - else: - meta_tags = [] + image_address = item["file"].get("url") + meta_tags = item["tags"] if ai_training else [] + post_tags = [item["tags"][tag_type] for tag_type in ["general", "species", "character"]] + post_tags += [item["tags"]["director"], item["tags"]["meta"]] if site == "e6ai" else [item["tags"]["copyright"], item["tags"]["artist"]] + post_tags = sum(post_tags, []) + user_blacklist_length = len(user_blacklist) + + passed = sum(blacklisted_tag in post_tags for blacklisted_tag in user_blacklist) - post_tags = post_tags1 + post_tags2 + post_tags3 + post_tags4 + post_tags5 - image_format = item["file"]["ext"] - user_blacklist_lenght = len(user_blacklist) - passed = 0 + if passed == 0 and not db and image_address and not any(tag in user_blacklist for tag in post_tags): + image_data = {"image_address": image_address, "image_format": item["file"]["ext"], "image_id": image_id, "meta_tags": meta_tags} + approved_list.append(image_data) - for blacklisted_tag in user_blacklist: - if blacklisted_tag in post_tags: - break - else: - passed += 1 - if passed == user_blacklist_lenght and str(image_id) not in db and image_address != None: - image_data = {"image_address": image_address, "image_format": image_format, "image_id": image_id, "meta_tags": meta_tags} + elif db and str(image_id) not in db and image_address and not any(tag in user_blacklist for tag in post_tags): + image_data = {"image_address": image_address, "image_format": item["file"]["ext"], "image_id": image_id, "meta_tags": meta_tags} approved_list.append(image_data) - else: - pass - # Download Each file with alive_bar(len(approved_list), calibrate=1, dual_line=True, title='Downloading') as bar: for data in approved_list: - image_address = data["image_address"] - image_format = data["image_format"] - image_id = data["image_id"] - meta_tags = data["meta_tags"] + image_address = data.get("image_address") + image_format = data.get("image_format") + image_id = data.get("image_id") + meta_tags = data.get("meta_tags") bar.text = f'-> Downloading: {image_id}, please wait...' - if user_proxies == True: - proxy = random.choice(proxy_list) - img_data = requests.get(image_address, proxies=proxy).content - else: - sleep(1) - img_data = requests.get(image_address).content - - safe_user_tags = user_tags.replace(" ", "_") - for char in unsafe_chars: - safe_user_tags = safe_user_tags.replace(char, "") - if not os.path.exists(f"media/{dt_now}_{safe_user_tags}"): - os.mkdir(f"media/{dt_now}_{safe_user_tags}") + proxy = random.choice(proxy_list) if user_proxies else None + img_data = requests.get(image_address, proxies=proxy).content if user_proxies else requests.get(image_address).content - if not os.path.exists(f"media/{dt_now}_{safe_user_tags}/meta") and ai_training == True: - os.mkdir(f"media/{dt_now}_{safe_user_tags}/meta") + safe_user_tags = "".join(char for char in user_tags if char not in unsafe_chars).replace(" ", "_") + directory = f"media/{dt_now}_{safe_user_tags}" + meta_directory = f"{directory}/meta" - with open(f"media/{dt_now}_{safe_user_tags}/{str(image_id)}.{image_format}", 'wb') as handler: - handler.write(img_data) + os.makedirs(directory, exist_ok=True) if ai_training == True: - with open(f"media/{dt_now}_{safe_user_tags}/meta/{str(image_id)}.json", 'w') as handler: + os.makedirs(meta_directory, exist_ok=True) + with open(f"{meta_directory}/{str(image_id)}.json", 'w') as handler: json.dump(meta_tags, handler, indent=6) - with open(f"db/{site}.db", "a") as db_writer: - db_writer.write(f"{str(image_id)}\n") + with open(f"{directory}/{str(image_id)}.{image_format}", 'wb') as handler: + handler.write(img_data) + + if db != False: + with open(f"db/{site}.db", "a") as db_writer: + db_writer.write(f"{str(image_id)}\n") + bar() print(colored(f"Page {page} Completed", "green")) @@ -124,4 +99,4 @@ def Fetcher(user_tags, user_blacklist, proxy_list, max_sites, user_proxies, apiU return {"status": "ok"} except Exception as e: - return {"status": "error", "uinput": user_tags, "exception": str(e), "extra": raw_req.content} \ No newline at end of file + return {"status": "error", "uinput": user_tags, "exception": str(e), "extra": raw_req.content} diff --git a/modules/furbooru.py b/modules/furbooru.py index 6af982a..0b9dc2b 100644 --- a/modules/furbooru.py +++ b/modules/furbooru.py @@ -1,91 +1,97 @@ -import requests -import random -from termcolor import colored -from alive_progress import alive_bar -from time import sleep -from datetime import datetime -import os +import requests # Importing requests library for making HTTP requests +import random # Importing random library for random selection +from termcolor import colored # Importing colored function from termcolor for colored output +from alive_progress import alive_bar # Importing alive_bar from alive_progress for progress bar +from time import sleep # Importing sleep function from time for delaying execution +from datetime import datetime # Importing datetime class from datetime module for date and time operations +import os # Importing os module for operating system related functionalities -from main import unsafe_chars -now = datetime.now() -dt_now = now.strftime("%d-%m-%Y_%H-%M-%S") +from main import unsafe_chars # Importing unsafe_chars from main module + +now = datetime.now() # Getting current date and time +dt_now = now.strftime("%d-%m-%Y_%H-%M-%S") # Formatting current date and time class FURBOORU(): - def Fetcher(user_tags, user_blacklist, proxy_list, max_sites, user_proxies, apiKey, header, db): + @staticmethod + def fetcher(user_tags, user_blacklist, proxy_list, max_sites, user_proxies, api_key, header, db): + """ + Fetches images from Furbooru API based on user-defined tags and parameters. + + Args: + user_tags (str): User-defined tags for image search. + user_blacklist (list): List of tags to blacklist. + proxy_list (list): List of proxies to use for requests. + max_sites (int): Maximum number of pages to fetch images from. + user_proxies (bool): Flag indicating whether to use proxies for requests. + api_key (str): API key for accessing the Furbooru API. + header (dict): HTTP header for requests. + db (bool or set): Database of downloaded images. + + Returns: + dict: Dictionary containing status of the operation. + """ try: - user_tags = user_tags.replace(" ", ", ") - approved_list = [] - page = 1 + user_tags = user_tags.replace(" ", ", ") # Replace spaces in user_tags with commas + approved_list = [] # List to store approved images + page = 1 # Starting page number + while True: - URL = f"https://furbooru.org/api/v1/json/search/images?q={user_tags}&page={page}&key={apiKey}&per_page=50" - if user_proxies == True: - proxy = random.choice(proxy_list) - raw_req = requests.get(URL, headers=header, proxies=proxy) - else: - raw_req = requests.get(URL, headers=header) - - req = raw_req.json() + URL = f"https://furbooru.org/api/v1/json/search/images?q={user_tags}&page={page}&key={api_key}&per_page=50" + # Constructing URL for API request + proxy = random.choice(proxy_list) if user_proxies else None # Selecting random proxy if user_proxies is True + raw_req = requests.get(URL, headers=header, proxies=proxy) # Making HTTP GET request + req = raw_req.json() # Parsing JSON response if req["total"] == 0: - print(colored("No images found or all downloaded! Try different tags.", "yellow")) - sleep(5) + print(colored("No images found or all downloaded! Try different tags.", "yellow")) # Display message if no images found + sleep(5) # Wait for 5 seconds break elif page == max_sites: - print(colored(f"Finished Downloading {max_sites} of {max_sites} pages.", "yellow")) - sleep(5) + print(colored(f"Finished Downloading {max_sites} of {max_sites} pages.", "yellow")) # Display message when maximum pages reached + sleep(5) # Wait for 5 seconds break - else: + else: for item in req["images"]: - image_hidden = item["hidden_from_users"] - if image_hidden != False: - pass - else: + if not item["hidden_from_users"]: post_tags = item["tags"] + if any(tag in user_blacklist for tag in post_tags): + continue # Skip image if any blacklisted tag is found + image_address = item["representations"]["full"] image_format = item["format"] image_id = item["id"] - user_blacklist_lenght = len(user_blacklist) - passed = 0 - - for blacklisted_tag in user_blacklist: - if blacklisted_tag in post_tags: - break - else: - passed += 1 - if passed == user_blacklist_lenght and str(image_id) not in db: + + if db is False or str(image_id) not in db: image_data = {"image_address": image_address, "image_format": image_format, "image_id": image_id} approved_list.append(image_data) - else: - pass + with alive_bar(len(approved_list), calibrate=1, dual_line=True, title='Downloading') as bar: for data in approved_list: image_address = data["image_address"] image_format = data["image_format"] image_id = data["image_id"] bar.text = f'-> Downloading: {image_id}, please wait...' - if user_proxies == True: - proxy = random.choice(proxy_list) - img_data = requests.get(image_address, proxies=proxy).content - else: - sleep(1) - img_data = requests.get(image_address).content + + proxy = random.choice(proxy_list) if user_proxies else None + img_data = requests.get(image_address, proxies=proxy).content if user_proxies else requests.get(image_address).content - safe_user_tags = user_tags.replace(" ", "_") - for char in unsafe_chars: - safe_user_tags = safe_user_tags.replace(char, "") + safe_user_tags = "".join(char for char in user_tags if char not in unsafe_chars).replace(" ", "_") + directory = f"media/{dt_now}_{safe_user_tags}" + os.makedirs(directory, exist_ok=True) - if not os.path.exists(f"media/{dt_now}_{safe_user_tags}"): - os.mkdir(f"media/{dt_now}_{safe_user_tags}") - with open(f"media/{dt_now}_{safe_user_tags}/{str(image_id)}.{image_format}", 'wb') as handler: + with open(f"{directory}/{str(image_id)}.{image_format}", 'wb') as handler: handler.write(img_data) - with open("db/furbooru.db", "a") as db_writer: - db_writer.write(f"{str(image_id)}\n") + + if db != False: + with open("db/furbooru.db", "a") as db_writer: + db_writer.write(f"{str(image_id)}\n") + bar() - print(colored(f"Page {page} Completed", "green")) - approved_list.clear() - page += 1 + print(colored(f"Page {page} Completed", "green")) # Display completion message for current page + approved_list.clear() # Clear approved_list for next page + page += 1 # Move to next page + + return {"status": "ok"} # Return success status - return {"status": "ok"} - except Exception as e: - return {"status": "error", "uinput": user_tags, "exception": str(e), "extra": raw_req.content} \ No newline at end of file + return {"status": "error", "uinput": user_tags, "exception": str(e), "extra": raw_req.content} # Return error status along with details diff --git a/modules/rule34.py b/modules/rule34.py index d1ddc6e..a49c772 100644 --- a/modules/rule34.py +++ b/modules/rule34.py @@ -7,53 +7,45 @@ import os from main import unsafe_chars + now = datetime.now() dt_now = now.strftime("%d-%m-%Y_%H-%M-%S") class RULE34(): - def Fetcher(user_tags, user_blacklist, proxy_list, max_sites, user_proxies, header, db): + @staticmethod + def fetcher(user_tags, user_blacklist, proxy_list, max_sites, user_proxies, header, db): try: approved_list = [] page = 1 + while True: URL = f"https://api.rule34.xxx/index.php?page=dapi&s=post&q=index&pid={page}&limit=1000&json=1&tags={user_tags}" - if user_proxies == True: - proxy = random.choice(proxy_list) - raw_req = requests.get(URL, headers=header, proxies=proxy) - else: - raw_req = requests.get(URL, headers=header) - + proxy = random.choice(proxy_list) if user_proxies else None + raw_req = requests.get(URL, headers=header, proxies=proxy) req = raw_req.json() - if req == []: + if not req: print(colored("No images found or all downloaded! Try different tags.", "yellow")) sleep(5) break - elif page == max_sites: print(colored(f"Finished Downloading {max_sites} of {max_sites} pages.", "yellow")) sleep(5) break - else: for item in req: post_tags = str.split(item["tags"]) + if any(tag in user_blacklist for tag in post_tags): + continue # Skip image if any blacklisted tag is found + image_address = item["file_url"] image_name = item["image"] image_id = item["id"] - user_blacklist_lenght = len(user_blacklist) - passed = 0 - for blacklisted_tag in user_blacklist: - if blacklisted_tag in post_tags: - break - else: - passed += 1 - if passed == user_blacklist_lenght and str(image_id) not in db: + if db is False or str(image_id) not in db: image_data = {"image_address": image_address, "image_name": image_name, "image_id": image_id} approved_list.append(image_data) - else: - pass + with alive_bar(len(approved_list), calibrate=1, dual_line=True, title='Downloading') as bar: for data in approved_list: image_address = data["image_address"] @@ -61,24 +53,26 @@ def Fetcher(user_tags, user_blacklist, proxy_list, max_sites, user_proxies, head image_id = data["image_id"] image_format = image_address.rpartition(".") bar.text = f'-> Downloading: {image_id}, please wait...' - if user_proxies == True: - proxy = random.choice(proxy_list) - img_data = requests.get(image_address, proxies=proxy).content - else: - sleep(1) - img_data = requests.get(image_address).content - + + proxy = random.choice(proxy_list) if user_proxies else None + img_data = requests.get(image_address, proxies=proxy).content if user_proxies else requests.get(image_address).content + safe_user_tags = user_tags.replace(" ", "_") for char in unsafe_chars: safe_user_tags = safe_user_tags.replace(char, "") - if not os.path.exists(f"media/{dt_now}_{safe_user_tags}"): - os.mkdir(f"media/{dt_now}_{safe_user_tags}") - with open(f"media/{dt_now}_{safe_user_tags}/{str(image_id)}.{image_format[-1]}", 'wb') as handler: + directory = f"media/{dt_now}_{safe_user_tags}" + os.makedirs(directory, exist_ok=True) + + with open(f"{directory}/{str(image_id)}.{image_format[-1]}", 'wb') as handler: handler.write(img_data) - with open("db/rule34.db", "a") as db_writer: - db_writer.write(f"{str(image_id)}\n") + + if db != False: + with open("db/rule34.db", "a") as db_writer: + db_writer.write(f"{str(image_id)}\n") + bar() + print(colored(f"Page {page} Completed", "green")) approved_list.clear() page += 1 @@ -86,4 +80,4 @@ def Fetcher(user_tags, user_blacklist, proxy_list, max_sites, user_proxies, head return {"status": "ok"} except Exception as e: - return {"status": "error", "uinput": user_tags, "exception": str(e), "extra": raw_req.content} \ No newline at end of file + return {"status": "error", "uinput": user_tags, "exception": str(e), "extra": raw_req.content} diff --git a/run.bat b/run.bat new file mode 100644 index 0000000..2a11aca --- /dev/null +++ b/run.bat @@ -0,0 +1 @@ +conda activate ".\.env" && python main.py \ No newline at end of file From 7e69fda84a29bddb3ece0059cd32a74c7e33df40 Mon Sep 17 00:00:00 2001 From: Official-Husko <27901510+Official-Husko@users.noreply.github.com> Date: Fri, 22 Mar 2024 22:23:14 +0100 Subject: [PATCH 2/2] fix name link in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9c90128..74a74a6 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Welcome to the successor of the [multporn image downloader v1][2] & [v2][1] and ### Preview ![preview](https://github.com/Official-Husko/NN-Downloader/blob/dev/preview/preview.gif) -[Windows Download][14] | [Linux Download][21] (Thanks to @incognibro538) +[Windows Download][14] | [Linux Download][21] (Thanks to [incognibro538](https://github.com/incognibro538))