Skip to content

Commit

Permalink
Refactor source loading and update checking #999
Browse files Browse the repository at this point in the history
  • Loading branch information
dipu-bd committed Aug 9, 2021
1 parent bb1f8d7 commit f107fe5
Show file tree
Hide file tree
Showing 20 changed files with 468 additions and 471 deletions.
9 changes: 9 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# EditorConfig is awesome: http://EditorConfig.org

# top-most EditorConfig file
root = true

# Unix-style newlines with a newline ending every file
[*]
end_of_line = lf
insert_final_newline = true
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* text=auto eol=lf
122 changes: 60 additions & 62 deletions README.md

Large diffs are not rendered by default.

9 changes: 1 addition & 8 deletions lncrawl/bots/console/start.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
# -*- coding: utf-8 -*-
import os
from urllib.parse import urlparse

from questionary import prompt

from ...core import display
from ...core.app import App
from ...core.arguments import get_args
from ...sources import add_all_crawlers, rejected_sources
from ...core.sources import rejected_sources
from .open_folder_prompt import display_open_folder
from .resume_download import resume_session

Expand All @@ -18,12 +17,6 @@ def start(self):
raise Exception('Unknown self: ' + type(self))

args = get_args()
for crawler_file in args.crawler:
if os.path.isfile(crawler_file):
add_all_crawlers(crawler_file)
# end if
# end if

if args.list_sources:
display.url_supported_list()
return
Expand Down
3 changes: 1 addition & 2 deletions lncrawl/bots/telegram.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
MessageHandler, Updater)

from ..core.app import App
from ..sources import crawler_list
from ..utils.uploader import upload

logger = logging.getLogger(__name__)
Expand All @@ -28,7 +27,7 @@
class TelegramBot:
def start(self):
os.environ['debug_mode'] = 'yes'

# Create the EventHandler and pass it your bot's token.
self.updater = Updater(
os.getenv('TELEGRAM_TOKEN', ''),
Expand Down
13 changes: 4 additions & 9 deletions lncrawl/bots/test/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,22 @@
"""
The purpose of this bot is to test the application and crawlers
"""
import io
import sys
import traceback
from random import random

from cloudscraper.exceptions import CaptchaException, CloudflareException
from requests import RequestException
from urllib3.exceptions import HTTPError
from cloudscraper.exceptions import CloudflareException, CaptchaException

from ...assets.icons import isWindows
from ...sources import crawler_list
from ...core.sources import crawler_list


class TestBot:
allerrors = dict()

from .test_inputs import test_user_inputs
from .test_inputs import allowed_failures

from .test_crawler import test_crawler
from .post_github import post_on_github
from .test_crawler import test_crawler
from .test_inputs import allowed_failures, test_user_inputs

def start(self):
try:
Expand Down
5 changes: 3 additions & 2 deletions lncrawl/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@
import logging
import os
import sys

import colorama
from colorama import Fore

from ..assets.version import get_value as get_version
from ..bots import run_bot
from ..utils.update_checker import check_updates
from .arguments import get_args
from .display import (cancel_method, debug_mode, description, epilog,
error_message, input_suppression)
from .sources import load_sources

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -65,7 +66,7 @@ def init():
def start_app():
init()

check_updates()
load_sources()
cancel_method()

try:
Expand Down
37 changes: 19 additions & 18 deletions lncrawl/core/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
from typing import Any, Dict, List, Optional, Tuple
from urllib.parse import urlparse

from lncrawl.core.crawler import Crawler
from slugify import slugify

from .. import constants as C
from ..binders import available_formats, generate_books
from ..sources import crawler_list
from lncrawl.core.crawler import Crawler
from .downloader import download_chapters, download_chapter_images
from ..core.sources import crawler_list, rejected_sources
from .downloader import download_chapter_images, download_chapters
from .novel_info import format_novel, save_metadata
from .novel_search import search_novels

Expand Down Expand Up @@ -93,24 +93,25 @@ def search_novel(self):
# ----------------------------------------------------------------------- #

def init_crawler(self, novel_url):
'''Requires: [user_input]'''
'''Produces: crawler'''
if not novel_url:
return
# end if
hostname = urlparse(novel_url).hostname
for home_url, create_crawler in crawler_list.items():
if hostname == urlparse(home_url).hostname:
logger.info('Initializing crawler for: %s', home_url)
self.crawler: Crawler = create_crawler()
self.crawler.novel_url = novel_url
self.crawler.home_url = home_url.strip('/')
break
# end if
# end for
if not self.crawler:
raise Exception('No crawlers were found')

parsed_url = urlparse(novel_url)
base_url = '%s//%s/' % (parsed_url.scheme, parsed_url.hostname)
if base_url in rejected_sources:
raise Exception('Source is rejected')
# end if

CrawlerType = crawler_list.get(base_url)
if not CrawlerType:
raise Exception('No crawler found')
# end if

logger.info('Initializing crawler for: %s', base_url)
self.crawler = CrawlerType()
self.crawler.home_url = base_url
self.crawler.novel_url = novel_url
# end def

def can_do(self, prop_name):
Expand All @@ -125,7 +126,7 @@ def get_novel_info(self):

self.crawler.initialize()
self.crawler.scraper.headers['origin'] = self.crawler.home_url
self.crawler.scraper.headers['referer'] = self.crawler.home_url + '/'
self.crawler.scraper.headers['referer'] = self.crawler.home_url

if self.can_do('login') and self.login_data:
logger.debug('Login with %s', self.login_data)
Expand Down
5 changes: 3 additions & 2 deletions lncrawl/core/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import random
import re
import sys
from typing import Dict, List
import unicodedata
from abc import abstractmethod
from concurrent.futures import ThreadPoolExecutor
Expand All @@ -29,7 +30,7 @@
NONPRINTABLE_MAPPING = {character: None for character in NONPRINTABLE}


class Crawler:
class Crawler(object):
'''Blueprint for creating new crawlers'''

def __init__(self) -> None:
Expand Down Expand Up @@ -97,7 +98,7 @@ def logout(self) -> None:
# end def

@abstractmethod
def search_novel(self, query):
def search_novel(self, query) -> List[Dict[str, str]]:
'''Gets a list of results matching the given query'''
pass
# end def
Expand Down
2 changes: 1 addition & 1 deletion lncrawl/core/display.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from colorama import Back, Fore, Style

from ..assets.icons import Icons
from ..sources import crawler_list

LINE_SIZE = 80
ENABLE_BANNER = not Icons.isWindows
Expand Down Expand Up @@ -119,6 +118,7 @@ def new_version_news(latest):


def url_supported_list():
from .sources import crawler_list
print('List of %d supported sources:' % len(crawler_list))
for url in sorted(crawler_list.keys()):
print(Fore.LIGHTGREEN_EX, Icons.RIGHT_ARROW, url, Fore.RESET)
Expand Down
10 changes: 5 additions & 5 deletions lncrawl/core/novel_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
import os
from concurrent import futures

from tqdm import tqdm
from slugify import slugify
from tqdm import tqdm

from ..sources import crawler_list
from ..core.sources import crawler_list

logger = logging.getLogger(__name__)

Expand All @@ -18,9 +18,9 @@

def get_search_result(app, link, bar):
try:
crawler = crawler_list[link]
instance = crawler()
instance.home_url = link.strip('/')
CrawlerType = crawler_list[link]
instance = CrawlerType()
instance.home_url = link
results = instance.search_novel(app.user_input)
logger.debug(results)
logger.info('%d results from %s', len(results), link)
Expand Down
Loading

0 comments on commit f107fe5

Please sign in to comment.