diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index e69de29..0000000 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8efe14c --- /dev/null +++ b/Makefile @@ -0,0 +1,15 @@ +clean: + find . -type d -name "__pycache__" -exec rm -rf {} + > /dev/null 2>&1 + find . -type f -name "*.pyc" -exec rm -rf {} + > /dev/null 2>&1 + +isort: + isort -rc metadrive + +lint: + flake8 --show-source metadrive + isort --check-only -rc metadrive --diff + +test: + pytest metadrive + +all: clean lint test \ No newline at end of file diff --git a/metadrive/__init__.py b/metadrive/__init__.py index d4fc1cb..12ebb5a 100644 --- a/metadrive/__init__.py +++ b/metadrive/__init__.py @@ -1,26 +1,22 @@ -import yaml -import pkgutil import inspect -import re +import os +import pkgutil + +from metaform import get_schema -from metadrive import utils +from metadrive import drives, utils, config __all__ = [] -for loader, module_name, is_pkg in pkgutil.walk_packages(__path__): +for loader, module_name, is_pkg in pkgutil.walk_packages(__path__): __all__.append(module_name) _module = loader.find_module(module_name).load_module(module_name) globals()[module_name] = _module -import os, config if not os.path.exists(config.DEFAULT_LOCATION): os.makedirs(config.DEFAULT_LOCATION) -from metaform import get_schema -from metadrive import drives - - def load(data, iterator=False): ''' takes: saved data item @@ -38,6 +34,7 @@ def load(data, iterator=False): elif isinstance(data, dict): return instantiate(data) + def search(source, features: dict): ''' Object discovery by features in source. @@ -51,7 +48,8 @@ def search(source, features: dict): Note: url itself is a feature, too. ''' - raise NotImplemented + raise NotImplementedError + def create(source, features: dict): ''' @@ -64,7 +62,7 @@ def create(source, features: dict): Returns: object's address, and success status and/or errors. ''' - raise NotImplemented + raise NotImplementedError def read(term, limit=None): @@ -89,7 +87,7 @@ def read(term, limit=None): if isinstance(readers, list): for i, reader in enumerate(readers): - print(i+1, reader) + print(i + 1, reader) reader_id = input("Choose reader [1] ") @@ -98,7 +96,7 @@ def read(term, limit=None): else: reader_id = int(reader_id) - if reader_id not in range(1, len(readers)+1): + if reader_id not in range(1, len(readers) + 1): raise Exception("The choice does not exist.") reader_id -= 1 @@ -109,10 +107,8 @@ def read(term, limit=None): else: raise Exception("Reader defined as anything other than string or list is not supported.") - package = utils.ensure_driver_installed(reader) - module = __import__(package) # Get method and package: @@ -129,29 +125,31 @@ def read(term, limit=None): else: return method() + def instantiate(data): _id = data.get('-') _drive = data.get('@') if _id is not None and _drive is not None: - # parsing '@' field: #sample: PyPI::halfbakery_driver==0.0.1:default.api.Topic + # parsing '@' field: + # sample: PyPI::halfbakery_driver==0.0.1:default.api.Topic # TBD: refactor by importing from metatype - packman = _drive.split('::', 1)[0] #sample: PyPI (Conan, NPM, Paket, etc.) - drivespec = _drive.split('::', 1)[-1] #sample: halfbakery_driver==0.0.1:default.api.Topic - driver_name_version = drivespec.split(':',1)[0] #sample: halfbakery_driver==0.0.1 - driver_name = driver_name_version.split('==',1)[0] #sample: halfbakery_driver - driver_version = driver_name_version.rsplit('==',1)[-1] #sample: 0.0.1 - profile_name_pkg_path = drivespec.rsplit(':',1)[-1] #sample: default.api.Topic - profile_name = profile_name_pkg_path.split('.',1)[0] #sample: default - pkg_path = profile_name_pkg_path.split('.',1)[-1] #sample: api.Topic + # packman = _drive.split('::', 1)[0] # sample: PyPI (Conan, NPM, Paket, etc.) + drivespec = _drive.split('::', 1)[-1] # sample: halfbakery_driver==0.0.1:default.api.Topic + driver_name_version = drivespec.split(':', 1)[0] # sample: halfbakery_driver==0.0.1 + driver_name = driver_name_version.split('==', 1)[0] # sample: halfbakery_driver + # driver_version = driver_name_version.rsplit('==', 1)[-1] # sample: 0.0.1 + profile_name_pkg_path = drivespec.rsplit(':', 1)[-1] # sample: default.api.Topic + profile_name = profile_name_pkg_path.split('.', 1)[0] # sample: default + # pkg_path = profile_name_pkg_path.split('.', 1)[-1] # sample: api.Topic # TBD: refactor by reusing metadrive/api.py# around 90 line ndriver = driver_name.replace('-', '_') - module = __import__(ndriver) + # module = __import__(ndriver) api = __import__('{}.api'.format(ndriver), fromlist=[ndriver]) - classname = _drive.rsplit('.',1)[-1] + classname = _drive.rsplit('.', 1)[-1] Klass = getattr(api, classname) item = Klass(data) diff --git a/metadrive/_os_static/icons/item.png b/metadrive/_os_static/icons/item.png deleted file mode 100644 index 8bce3a1..0000000 Binary files a/metadrive/_os_static/icons/item.png and /dev/null differ diff --git a/metadrive/_requests.py b/metadrive/_requests.py index de7f2bb..6fe27f7 100644 --- a/metadrive/_requests.py +++ b/metadrive/_requests.py @@ -1,16 +1,17 @@ -import os import inspect -import pathlib +import os + import requests -from metadrive import config -from metadrive import utils -from metadrive import mixins + +from metadrive import config, mixins, utils SUBTOOL = os.path.basename(__file__).split('.py')[0] + def get_session(*args, **kwargs): return requests.Session(*args, **kwargs) + class RequestsDrive(requests.Session): def __init__(self, *args, **kwargs): @@ -30,7 +31,6 @@ def get(self, *args, **kwargs): 'https': 'socks5h://{}'.format(socks)} kwargs.update({'proxies': proxies}) - self.response = super().get(*args, **kwargs) if hasattr(self, 'profile'): @@ -39,7 +39,7 @@ def get(self, *args, **kwargs): session_prefix_file = os.path.join( SUBTOOL, '{drive_id}/cookies.json'.format( drive_id=self.profile - )) + )) utils.save_session_data(session_prefix_file, session_data) @@ -53,7 +53,7 @@ def get_drive( recreate_profile=False, proxies='default'): - ## ----------- TO MOVE TO MIXIN --------------- # + # ----------- TO MOVE TO MIXIN --------------- # proxy = mixins.set_proxies(proxies) local = mixins.init_profile(profile, porfiles_dir, recreate_profile) @@ -66,13 +66,12 @@ def get_drive( else: drive = None - if drive is not None: session_prefix_file = os.path.join( drive.subtool, '{drive_id}/cookies.json'.format( drive_id=profile - )) + )) if os.path.exists(os.path.join(config.SESSIONS_DIR, session_prefix_file)): session_data = utils.load_session_data(session_prefix_file) @@ -92,7 +91,6 @@ def get_drive( {'proxy': proxy} ) - # LATER MOVE TO MIXIN drive.caller_module = inspect.getmodule(inspect.currentframe().f_back) diff --git a/metadrive/_selenium.py b/metadrive/_selenium.py index c8f44d1..b0d4d15 100644 --- a/metadrive/_selenium.py +++ b/metadrive/_selenium.py @@ -6,17 +6,18 @@ # To create selenium driver may use something like: docker run -d -p 4444:4444 selenium/standalone-chrome:3.7.1-beryllium ''' -import os import inspect +import os import pathlib +from deprecated import deprecated from selenium import webdriver -from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.chrome.options import Options -from selenium.webdriver.common.action_chains import ActionChains from metadrive import config -from deprecated import deprecated + +# from selenium.webdriver.support.wait import WebDriverWait + class TabsMixin: @@ -29,6 +30,7 @@ def open_tab(self, name, url=None): raise Exception('Tab not found, and url not provided.') else: self.switch_to.window(self.tabs[name]) + def current_tab(self): return next(filter(lambda x: x[1] == self.current_window_handle, self.tabs.items())) @@ -47,6 +49,7 @@ def __init__(self, *args, **kwargs): self.tabs = {'default': self.current_window_handle} self.metaname = '' + class Remote(webdriver.Remote, TabsMixin): def __init__(self, *args, **kwargs): @@ -54,18 +57,18 @@ def __init__(self, *args, **kwargs): self.tabs = {'default': self.current_window_handle} self.metaname = '' -def get_drive( - driver_location=config.CHROME_DRIVER, - profile='default', - porfiles_dir='.metadrive/sessions/_selenium', - headless=config.SELENIUM.get('headless') == 'True' or False, - load_images=True, - load_adblocker=True, - recreate_profile=False, - download_to='', - proxies='default', - ): +def get_drive( + driver_location=config.CHROME_DRIVER, + profile='default', + porfiles_dir='.metadrive/sessions/_selenium', + headless=config.SELENIUM.get('headless') == 'True' or False, + load_images=True, + load_adblocker=True, + recreate_profile=False, + download_to='', + proxies='default', +): ''' Gets a new browser, with session in specific directory. @@ -143,9 +146,8 @@ def get_drive( # OPTIONS.add_argument('--ssl-protocol=any') # OPTIONS.add_argument('--web-security=no') - PREFERENCES = {} - #OPTIONS.experimental_options["prefs"] = PREFERENCES + # OPTIONS.experimental_options["prefs"] = PREFERENCES if headless: OPTIONS.add_argument('--headless') @@ -159,7 +161,7 @@ def get_drive( if download_to: PREFERENCES.update( - {'download.default_directory' : download_to, + {'download.default_directory': download_to, 'download.prompt_for_download': False, 'download.directory_upgrade': True, 'safebrowsing.enabled': False, @@ -172,7 +174,7 @@ def get_drive( # OPTIONS.add_extension(os.path.join(os.getcwd(), 'subtools/extensions/ghostery.crx')) OPTIONS.add_extension( os.path.join(os.getcwd(), 'subtools/selenium/extensions/ublock_origin.crx')) - except: + except Exception: pass # ------------- INITIALIZATION SECTION ------------ # @@ -186,7 +188,6 @@ def get_drive( CHROME_DRIVER_LOCATION = driver_location local = True - if local: profile_path = os.path.join( str(pathlib.Path.home()), @@ -196,7 +197,7 @@ def get_drive( if profile_path is not None: - OPTIONS.add_argument("--user-data-dir={}".format(profile_path)); + OPTIONS.add_argument("--user-data-dir={}".format(profile_path)) if not profile_path: os.makedirs(profile_path) @@ -247,18 +248,19 @@ def get_drive( return browser + @deprecated(reason="Use get_drive() instead.") def get_driver( - driver_location=config.CHROME_DRIVER, - profile='default', - porfiles_dir='.metadrive/sessions/_selenium', - headless=False, - load_images=True, - load_adblocker=True, - recreate_profile=False, - download_to='', - proxies='default', - ): + driver_location=config.CHROME_DRIVER, + profile='default', + porfiles_dir='.metadrive/sessions/_selenium', + headless=False, + load_images=True, + load_adblocker=True, + recreate_profile=False, + download_to='', + proxies='default', +): return get_drive( driver_location=driver_location, @@ -285,15 +287,15 @@ def save_as(element, driver): from PIL import Image from io import BytesIO - as__ = Image.open( + as__ = Image.open( # noqa BytesIO( base64.b64decode( - 'iVBORw0KGgoAAAANSUhEUgAAACMAAAAQCAIAAAATVVENAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAB3RJTUUH4gsTAjMlHs+8UAAAAB1pVFh0Q29tbWVudAAAAAAAQ3JlYXRlZCB3aXRoIEdJTVBkLmUHAAABrElEQVQ4y2P8//8/A10AEwO9wKhNlAAWZM7SFStXrVn78tUrXl5eTzfXvOwsFhYWiPiSZcs/ffrExsbm7elRUlhAjlX/kcCOXbsfPX7879+/e/fvewcELVm2/P///w8ePDSxsrl3//7///+/fft26cqV/2QBlNBzd3WRlZFhZGRUVFAIDw05efo0AwMDCwvL////r9+4+fnLF05OTl1tbfJCjxE5Px06cnThkiVPnjxlYGD4/uO7nKzckvlzGRgY9h04uHzVqsuXr6ioqKQlJ9nZWFMUem/fvTM0tzx4+Mjfv3////+/eOmy6PhEZO///Plz1dq1RhZWX758oSj0vn/7/u/fPw01VSYmpg8fP67buBEi/vDho+MnT/78+ZONjU1QQJCRkZGJmZmBgeHYiRPzFy2GqHnz9m3vhIlv3r6FcOcvWnzsxAmcaU9aWqogJzs5I0tQUICPl8/R3v7kqdMMDAw/f/2cOn3m/QcPGJmYpKUk+7o6OTk4GBgYLl66vH3XrsS4WAYGhg8fPixauszf10dEWJiBgWH9pk2ebm5WFhY442m0jBi0NgEAiOU/HUCPdjUAAAAASUVORK5CYII='))) + 'iVBORw0KGgoAAAANSUhEUgAAACMAAAAQCAIAAAATVVENAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAB3RJTUUH4gsTAjMlHs+8UAAAAB1pVFh0Q29tbWVudAAAAAAAQ3JlYXRlZCB3aXRoIEdJTVBkLmUHAAABrElEQVQ4y2P8//8/A10AEwO9wKhNlAAWZM7SFStXrVn78tUrXl5eTzfXvOwsFhYWiPiSZcs/ffrExsbm7elRUlhAjlX/kcCOXbsfPX7879+/e/fvewcELVm2/P///w8ePDSxsrl3//7///+/fft26cqV/2QBlNBzd3WRlZFhZGRUVFAIDw05efo0AwMDCwvL////r9+4+fnLF05OTl1tbfJCjxE5Px06cnThkiVPnjxlYGD4/uO7nKzckvlzGRgY9h04uHzVqsuXr6ioqKQlJ9nZWFMUem/fvTM0tzx4+Mjfv3////+/eOmy6PhEZO///Plz1dq1RhZWX758oSj0vn/7/u/fPw01VSYmpg8fP67buBEi/vDho+MnT/78+ZONjU1QQJCRkZGJmZmBgeHYiRPzFy2GqHnz9m3vhIlv3r6FcOcvWnzsxAmcaU9aWqogJzs5I0tQUICPl8/R3v7kqdMMDAw/f/2cOn3m/QcPGJmYpKUk+7o6OTk4GBgYLl66vH3XrsS4WAYGhg8fPixauszf10dEWJiBgWH9pk2ebm5WFhY442m0jBi0NgEAiOU/HUCPdjUAAAAASUVORK5CYII='))) # noqa position = pyautogui.locateOnScreen(as__) if position: print(position) - pyautogui.click(x=position[0],y=position[1]) + pyautogui.click(x=position[0], y=position[1]) else: print('Could find "Save as ..." position. Try local, non-headless.') diff --git a/metadrive/_xarray.py b/metadrive/_xarray.py index 89786a2..7ad9e51 100644 --- a/metadrive/_xarray.py +++ b/metadrive/_xarray.py @@ -1,9 +1,10 @@ +import inspect import os + # import xarray import pandas -import inspect -from metadrive import mixins +from metadrive import mixins SUBTOOL = os.path.basename(__file__).split('.py')[0] @@ -25,8 +26,8 @@ def get_drive( recreate_profile=False, proxies='default'): - proxy = mixins.set_proxies(proxies) - local = mixins.init_profile(profile, porfiles_dir, recreate_profile) + mixins.set_proxies(proxies) + mixins.init_profile(profile, porfiles_dir, recreate_profile) drive = XarrayDrive() drive.subtool = SUBTOOL diff --git a/metadrive/auth.py b/metadrive/auth.py index 31fa6c8..1541292 100644 --- a/metadrive/auth.py +++ b/metadrive/auth.py @@ -1,8 +1,10 @@ import random + import requests -from metadrive._requests import get_session from metadrive import utils +from metadrive._requests import get_session + class UserAgents: @@ -17,6 +19,7 @@ def random_android(self): return user_agent + class RequestsCookieAuthentication: def __init__(self, raw_cookie, key_name, proxies={}): @@ -45,14 +48,14 @@ def authenticate(self): if credential: session.headers.update(dict({ - 'content-type':'text/plain', + 'content-type': 'text/plain', }, **credential)) else: raise Warning("Credential is not provided, some data may not be retrieved.") else: session.headers.update({ - 'content-type':'text/plain', + 'content-type': 'text/plain', 'cookie': self.raw_cookie }) diff --git a/metadrive/cli.py b/metadrive/cli.py index 1cdf330..0cbd859 100644 --- a/metadrive/cli.py +++ b/metadrive/cli.py @@ -1,16 +1,11 @@ +import os +from urllib.parse import urlparse + import click -import importlib -import json + import metadrive from metadrive import utils -from typology.utils import slug -from metawiki import name_to_url -import os -from urllib.parse import urlparse -from metadrive.config import ( - ENSURE_SITES, - SITES_DIR, -) +from metadrive.config import ENSURE_SITES, SITES_DIR from metadrive.mnt import mount # Cause ecryptfs supports max 143 chars. @@ -47,7 +42,7 @@ def connect(resource, mountpoint=None, user=None, period=900): # 2. Checking for driver with it. index = drivers.index() - results = list(filter(lambda x: x['domain']==default_domain, index)) + results = list(filter(lambda x: x['domain'] == default_domain, index)) if not results: print("No drivers found for {}.".format(resource)) @@ -70,14 +65,14 @@ def connect(resource, mountpoint=None, user=None, period=900): try: import pkg_resources package_version = pkg_resources.require(first_driver.get('package'))[0].version - except: + except Exception: # print("The package not yet installed. Latest package found.") package_version = first_driver.get('info')['version'] print("-================================================-\n[*] using: [PyPI:{packname}=={version}]".format( packname=first_driver.get('package'), - version=package_version #'>'+first_driver.get('info')['version'] - ), + version=package_version # '>'+first_driver.get('info')['version'] + ), ) if mountpoint is None: @@ -92,7 +87,7 @@ def connect(resource, mountpoint=None, user=None, period=900): ) module = __import__(package) - api = importlib.import_module('{}.api'.format(package)) + # api = importlib.import_module('{}.api'.format(package)) # print('\nTop level methods:\n') # for met in dir(module): @@ -106,7 +101,7 @@ def connect(resource, mountpoint=None, user=None, period=900): # print(' - ', cls) # - drive_name = 'default' #input("Enter the name of drive [default]: ") or 'default' + drive_name = 'default' # input("Enter the name of drive [default]: ") or 'default' # drive = get_drive( # profile=profile, @@ -124,7 +119,6 @@ def connect(resource, mountpoint=None, user=None, period=900): ) drive = metadrive.drives.get(drive_fullname, interactive=False) - mountpoint = '{}:{}'.format(mountpoint, drive_name) if not os.path.exists(mountpoint): os.makedirs(mountpoint) @@ -141,7 +135,6 @@ def connect(resource, mountpoint=None, user=None, period=900): print("Pass '--user {}' next time, to reuse the session.".format(drive_name)) def sync(): - print("[*] mount: {}\n-================================================-".format(mountpoint)) import inspect if 'period' in inspect.getfullargspec(module._harvest).args: module._harvest(drive=drive, period=period) @@ -149,9 +142,10 @@ def sync(): module._harvest(drive=drive) from multiprocessing import Process - syncer = Process( target=sync ) + syncer = Process(target=sync) syncer.daemon = True syncer.start() + print("[*] mount: {}\n-================================================-".format(mountpoint)) mount(savedir, mountpoint) syncer.terminate() diff --git a/metadrive/config.py b/metadrive/config.py index 5f2e54f..f347196 100644 --- a/metadrive/config.py +++ b/metadrive/config.py @@ -1,15 +1,17 @@ -import os +import configparser import imp +import os from pathlib import Path -import configparser -import requests + import gpgrecord +import requests + config = configparser.ConfigParser() INSTALLED = imp.find_module('metadrive')[1] HOME = str(Path.home()) -DEFAULT_LOCATION = os.path.join(HOME,'.metadrive') +DEFAULT_LOCATION = os.path.join(HOME, '.metadrive') CONFIG_LOCATION = os.path.join(DEFAULT_LOCATION, 'config') CREDENTIALS_DIR = os.path.join(DEFAULT_LOCATION, '-/+') SESSIONS_DIR = os.path.join(DEFAULT_LOCATION, 'sessions') @@ -34,30 +36,35 @@ def ENSURE_SESSIONS(): if subtool != '__init__': os.makedirs(subtool_profiles_path) + ENSURE_SESSIONS() + def ENSURE_DATA(): if not os.path.exists(DATA_DIR): os.makedirs(DATA_DIR) + ENSURE_DATA() + def ENSURE_SITES(): if not os.path.exists(SITES_DIR): os.makedirs(SITES_DIR) + ENSURE_SITES() if not os.path.exists(CONFIG_LOCATION): - username = input("Type your GitHub username: ") + username = "seva" # input("Type your GitHub username: ") config['GITHUB'] = {'USERNAME': username} config['PROXIES'] = {'http': '', 'https': ''} config['DRIVERS'] = {'auto_upgrade': False} config['SELENIUM'] = {'headless': False} config['DRIVER_BACKENDS'] = { - 'CHROME': '/usr/bin/chromedriver' # e.g., or http://0.0.0.0:4444/wd/hub, etc. + 'CHROME': '/usr/local/bin/chromedriver' # e.g., or http://0.0.0.0:4444/wd/hub, etc. } with open(CONFIG_LOCATION, 'w') as configfile: @@ -86,7 +93,6 @@ def ENSURE_REPO(): while not requests.get('https://github.com/{}/-'.format(GITHUB_USER)).ok: input("Please, create repository named `-` on your GitHub. Type [ENTER] to continue... ") - if os.path.exists(REPO_PATH): # git pull # os.system('cd {}; git pull'.format(REPO_PATH)) @@ -102,6 +108,7 @@ def ENSURE_REPO(): REPO_PATH )) + def ENSURE_GPG(): config.read(CONFIG_LOCATION) if 'GPG' in config.keys(): @@ -112,7 +119,7 @@ def ENSURE_GPG(): for i, key in enumerate(KEY_LIST): print('{id}. {uid} {fingerprint}'.format( - id=i+1, + id=i + 1, uid=key['uids'], fingerprint=key['fingerprint'] )) @@ -128,13 +135,15 @@ def ENSURE_GPG(): return GPG_KEY + def ENSURE_PROXIES(): config.read(CONFIG_LOCATION) if 'PROXIES' in config.keys(): - return {key: 'socks5h://'+config['PROXIES'][key] or None + return {key: 'socks5h://' + config['PROXIES'][key] or None for key in config['PROXIES'] if config['PROXIES'][key]} - SOCKS5 = input('Type-in default socks5 proxy (e.g., 127.0.0.1:9999) (leave emtpy to default to direct connections) [ENTER]: ') + SOCKS5 = input( + 'Type-in default socks5 proxy (e.g., 127.0.0.1:9999) (leave emtpy to default to direct connections) [ENTER]: ') config['PROXIES'] = { 'http': SOCKS5, @@ -144,5 +153,5 @@ def ENSURE_PROXIES(): with open(CONFIG_LOCATION, 'w') as configfile: config.write(configfile) - return {key: 'socks5h://'+config['PROXIES'][key] or None + return {key: 'socks5h://' + config['PROXIES'][key] or None for key in config['PROXIES'] if config['PROXIES'][key]} diff --git a/metadrive/drivers.py b/metadrive/drivers.py index 82980c7..23d5c8d 100644 --- a/metadrive/drivers.py +++ b/metadrive/drivers.py @@ -1,13 +1,13 @@ import io +import json import os +import tarfile +import urllib + import bs4 +import requests import tqdm import yaml -import json -import time -import urllib -import tarfile -import requests from metadrive.config import KNOWN_DRIVERS @@ -31,7 +31,7 @@ def auto_discover(refresh=True): if total_length is None: f.write(response.content) else: - total_length = 10000000 #int(total_length) + total_length = 10000000 # int(total_length) with tqdm.tqdm(total=total_length) as pbar: for data in response.iter_content(chunk_size=4096): @@ -71,6 +71,7 @@ def auto_discover(refresh=True): # This is for retrieving setup.py details. from metadrive.utils import stdoutIO import setuptools + def setup(**kwargs): print(json.dumps(kwargs)) setuptools.setup = setup @@ -96,8 +97,8 @@ def setup(**kwargs): response = requests.get(last_version.attrs['href']) try: - tar = tarfile.open(mode= "r:gz", fileobj = io.BytesIO(response.content)) - except: + tar = tarfile.open(mode="r:gz", fileobj=io.BytesIO(response.content)) + except Exception: tar = None if tar is not None: @@ -112,7 +113,7 @@ def setup(**kwargs): for line in text.split('\n'): if '__site_url__' in line: if '=' in line: - site = line.split('=',1)[-1].strip()[1:-1] + site = line.split('=', 1)[-1].strip()[1:-1] if site: if site.startswith('http'): domain = urllib.parse.urlparse(site).hostname @@ -147,5 +148,6 @@ def setup(**kwargs): return site_drivers + def index(): return auto_discover(refresh=False) diff --git a/metadrive/drives.py b/metadrive/drives.py index 93bd4d2..9d6e2b5 100644 --- a/metadrive/drives.py +++ b/metadrive/drives.py @@ -1,18 +1,16 @@ import os -from metadrive.config import ( - INSTALLED, - SESSIONS_DIR, - SUBTOOLS -) -from metadrive.utils import find_drivers -from metadrive import utils + import pkg_resources +from metadrive import utils +from metadrive.config import SESSIONS_DIR, SUBTOOLS + # This package manages what profiles are created, and actually the sessions on disk, # rather than active sessions on API. ACTIVE = {} + def all(): ''' second coordinate uniquely identifies drives @@ -21,25 +19,13 @@ def all(): drives_map = [] for subtool in SUBTOOLS: - subtool_dir = os.path.join(SESSIONS_DIR,subtool) + subtool_dir = os.path.join(SESSIONS_DIR, subtool) for drive_dir in os.listdir(subtool_dir): drives_map.append((subtool, drive_dir, 'ALIVE' if ACTIVE.get(drive_dir) else 'DEAD')) return drives_map -def next_string(s): - a1 = range(65, 91) # capital letters - a2 = range(97, 123) # letters - a3 = range(48, 58) # numbers - char = ord(s[-1]) - for a in [a1, a2, a3]: - if char in a: - if char + 1 in a: - return s[:-1] + chr(char + 1) - else: - ns = next_string(s[:-1]) if s[:-1] else chr(a[0]) - return ns + chr(a[0]) def get(driver_or_drive, interactive=False): @@ -47,12 +33,11 @@ def get(driver_or_drive, interactive=False): return ACTIVE[driver_or_drive] if ':' in driver_or_drive: - driver, drive_id = driver_or_drive.split(':',1) + driver, drive_id = driver_or_drive.split(':', 1) drive = driver_or_drive else: driver = driver_or_drive drive = None - drive_id = None ndriver = driver.replace('-', '_') package = utils.ensure_driver_installed(driver_name='pypi:{}'.format(ndriver)) @@ -61,8 +46,6 @@ def get(driver_or_drive, interactive=False): d = all() drives = list(zip(*d))[1] if d else [] - ids = sorted([d.split(':',1)[-1] for d in drives if ':' in d]) - if drive in drives: drive_obj = module.get_drive(profile=drive) elif drive is not None: @@ -71,11 +54,6 @@ def get(driver_or_drive, interactive=False): else: drive_obj = module.get_drive(profile=drive) else: - if ids: - i = ids[-1] - else: - i = '0' - import inspect if interactive and 'interactive' in inspect.getfullargspec(module._login).args: drive_obj = module._login(interactive=interactive) @@ -83,7 +61,6 @@ def get(driver_or_drive, interactive=False): drive = '{}:{}'.format(driver, drive) else: drive = '{}:{}'.format(driver, 'default') - # drive = '{}:{}'.format(driver, next_string(i)) drive_obj = module.get_drive(profile=drive) ACTIVE[drive] = drive_obj @@ -94,9 +71,9 @@ def get(driver_or_drive, interactive=False): # TODO: refactor with api.py#creating-informative-drive drive_obj.spec = '{packman}::{driver}=={version}:{profile}.{namespace}'.format( packman='PyPI', - driver=drive_obj.drive_id.split(':',1)[0], #.replace('-', '_'), + driver=drive_obj.drive_id.split(':', 1)[0], # .replace('-', '_'), version=driver_version, - profile=drive_obj.drive_id.rsplit(':',1)[-1], + profile=drive_obj.drive_id.rsplit(':', 1)[-1], namespace='api.', # namspace not present, because it's a drive, but we prepare based on drivers package convention, the .api. # then, in packages we only have to provide type(self).__name__, e.g.: diff --git a/metadrive/helpers.py b/metadrive/helpers.py index efdb53b..9acc662 100644 --- a/metadrive/helpers.py +++ b/metadrive/helpers.py @@ -1,6 +1,8 @@ -import yaml import inspect +import yaml + + def get_actions(cls): ''' Convenience function to create summaries of actions. @@ -11,17 +13,19 @@ def get_actions(cls): if not k.startswith('__'): sig = inspect.signature(v) actions[k] = \ - '<'+', '.join([ - p + str(sig.parameters[p].annotation.__name__ != '_empty' and ': '+sig.parameters[p].annotation.__name__ or '') - for p in sig.parameters - if sig.parameters[p].name != 'self' - ])+'>' + '<' + ', '.join([ + p + str(sig.parameters[p].annotation.__name__ != '_empty' and ': ' + + sig.parameters[p].annotation.__name__ or '') + for p in sig.parameters + if sig.parameters[p].name != 'self' + ]) + '>' if v.__doc__: actions[k] += ' - ' + v.__doc__.strip() return actions + def print_actions(cls): actions = get_actions(cls) diff --git a/metadrive/mixins.py b/metadrive/mixins.py index 92a6bbc..8c6bec6 100644 --- a/metadrive/mixins.py +++ b/metadrive/mixins.py @@ -1,7 +1,9 @@ import os import pathlib + from metadrive import config + def set_proxies(proxies): if proxies == 'default': @@ -37,6 +39,7 @@ def set_proxies(proxies): return proxy + def init_profile(profile, porfiles_dir, recreate_profile): local = True diff --git a/metadrive/mnt.py b/metadrive/mnt.py index 53537fa..83e73b5 100644 --- a/metadrive/mnt.py +++ b/metadrive/mnt.py @@ -1,8 +1,8 @@ from __future__ import with_statement +import errno import os import sys -import errno from fuse import FUSE, FuseOSError, Operations @@ -39,8 +39,12 @@ def chown(self, path, uid, gid): def getattr(self, path, fh=None): full_path = self._full_path(path) st = os.lstat(full_path) - return dict((key, getattr(st, key)) for key in ('st_atime', 'st_ctime', - 'st_gid', 'st_mode', 'st_mtime', 'st_nlink', 'st_size', 'st_uid')) + return dict((key, getattr(st, key)) for key in ( + 'st_atime', 'st_ctime', + 'st_gid', 'st_mode', + 'st_mtime', 'st_nlink', + 'st_size', 'st_uid' + )) def readdir(self, path, fh): full_path = self._full_path(path) @@ -72,9 +76,13 @@ def mkdir(self, path, mode): def statfs(self, path): full_path = self._full_path(path) stv = os.statvfs(full_path) - return dict((key, getattr(stv, key)) for key in ('f_bavail', 'f_bfree', - 'f_blocks', 'f_bsize', 'f_favail', 'f_ffree', 'f_files', 'f_flag', - 'f_frsize', 'f_namemax')) + return dict((key, getattr(stv, key)) for key in ( + 'f_bavail', 'f_bfree', + 'f_blocks', 'f_bsize', + 'f_favail', 'f_ffree', + 'f_files', 'f_flag', + 'f_frsize', 'f_namemax' + )) def unlink(self, path): return os.unlink(self._full_path(path)) @@ -128,6 +136,7 @@ def fsync(self, path, fdatasync, fh): def mount(root, mountpoint): FUSE(Passthrough(root), mountpoint, nothreads=True, foreground=True) + if __name__ == '__main__': mount(sys.argv[1], sys.argv[2]) diff --git a/metadrive/tasks.py b/metadrive/tasks.py deleted file mode 100644 index 04360ea..0000000 --- a/metadrive/tasks.py +++ /dev/null @@ -1,7 +0,0 @@ -from celery import Celery - -app = Celery('metadrive.tasks', broker='pyamqp://guest@localhost//') - -@app.task -def add(x, y): - return x + y diff --git a/metadrive/tests/test_wrapper.py b/metadrive/tests/test_wrapper.py index 0ba576d..008f735 100644 --- a/metadrive/tests/test_wrapper.py +++ b/metadrive/tests/test_wrapper.py @@ -1,5 +1,6 @@ import metadrive + def test_read_table(): drive = metadrive.drives.get('table-driver:default') from table_driver.api import Row @@ -16,4 +17,3 @@ def test_read_table(): del expect['@'] del result['@'] assert expect == result - diff --git a/metadrive/utils.py b/metadrive/utils.py index e86b9ce..518cfc0 100644 --- a/metadrive/utils.py +++ b/metadrive/utils.py @@ -1,11 +1,14 @@ +import contextlib +import importlib +import json import os import re -import yaml -import json -import requests +import sys +from io import StringIO + import gpgrecord -import importlib import pkg_resources +import yaml from metadrive import config @@ -57,6 +60,7 @@ def get_metaname(namespace, anchor=None): main=anchor if anchor else MAIN ) + def get_credential(namespace): ''' namespace: -- service name, by directory @@ -74,9 +78,10 @@ def get_credential(namespace): return credential - except: + except Exception: return None + def set_credential(namespace, credential): ''' namespace: -- service name, by directory @@ -100,12 +105,12 @@ def set_credential(namespace, credential): cont=yaml.dump(encrypted_credential) ) - repo = config.ENSURE_REPO() + # repo = config.ENSURE_REPO() with open( os.path.join( config.CREDENTIALS_DIR, - namespace+'.md'), 'w') as f: + namespace + '.md'), 'w') as f: f.write(content) os.system('cd {}; git add .; git commit -m "update"; git push origin master'.format( @@ -113,6 +118,7 @@ def set_credential(namespace, credential): return + def get_or_ask_credentials(namespace, variables, ask_refresh=False): credential = get_credential(namespace) @@ -142,6 +148,7 @@ def get_or_ask_credentials(namespace, variables, ask_refresh=False): else: return credential + def load_session_data(namespace): session_path = os.path.join(config.SESSIONS_DIR, namespace) if os.path.exists(session_path): @@ -150,16 +157,18 @@ def load_session_data(namespace): else: return {} + def save_session_data(namespace, session_data): session_path = os.path.join(config.SESSIONS_DIR, namespace) json.dump(session_data, open(session_path, 'w')) + def ensure_driver_installed(driver_name): reader = driver_name SUPPORTED_PACKAGE_MANAGERS = ['pypi'] - if reader.lower().split(':',1)[0] not in SUPPORTED_PACKAGE_MANAGERS: + if reader.lower().split(':', 1)[0] not in SUPPORTED_PACKAGE_MANAGERS: raise Exception( "Unknown package manager. " + "Make sure the reader you chose starts with one of these: " + @@ -193,16 +202,15 @@ def ensure_driver_installed(driver_name): # TBD: unify the way we refer to package manager, use '::' in all cases packman, package = package_name.split(':') - # Make sure we have that package installed. spec = importlib.util.find_spec(package) if spec is None: # answer = input(package +" is not installed. Install it? [Y/n] ") # if answer in ['y', 'Y', '']: try: - #easy_install.main( ["-U", package_name] ) + # easy_install.main( ["-U", package_name] ) os.system('pip install --no-input -U {} --no-cache'.format(package)) - except SystemExit as e: + except SystemExit: pass # else: # raise Exception(package_name +" is required. Install it and run again.") @@ -225,40 +233,36 @@ def get_lastest_version_number(package_name): def cmp_version(version1, version2): def norm(v): - return [int(x) for x in re.sub(r'(\.0+)*$','', v).split(".")] + return [int(x) for x in re.sub(r'(\.0+)*$', '', v).split(".")] a, b = norm(version1), norm(version2) return (a > b) - (a < b) if latest_version is not None: if cmp_version(installed_version, latest_version) < 0: - print('You are running {}=={}'.format(package,installed_version)+", but there is newer ({}) version.".format(latest_version)) + print('You are running {}=={}'.format(package, installed_version) + + ", but there is newer ({}) version.".format(latest_version)) if config.AUTO_UPGRADE_DRIVERS is None: answer = input("Upgrade it? [y/N] ") if answer in ['y', 'Y']: try: os.system('pip install --no-input -U {} --no-cache'.format(package)) - except SystemExit as e: + except SystemExit: pass elif config.AUTO_UPGRADE_DRIVERS: try: os.system('pip install --no-input -U {} --no-cache'.format(package)) - except SystemExit as e: + except SystemExit: pass - else: # config.AUTO_UPGRADE_DRIVERS == False: + else: # config.AUTO_UPGRADE_DRIVERS == False: pass return package - -import sys -from io import StringIO -import contextlib - @contextlib.contextmanager def stdoutIO(stdout=None): old = sys.stdout @@ -267,4 +271,3 @@ def stdoutIO(stdout=None): sys.stdout = stdout yield stdout sys.stdout = old - diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index d6e1198..0000000 --- a/requirements.txt +++ /dev/null @@ -1 +0,0 @@ --e . diff --git a/requirements/dev.txt b/requirements/dev.txt new file mode 100644 index 0000000..892a97d --- /dev/null +++ b/requirements/dev.txt @@ -0,0 +1,7 @@ +flake8==3.7.7 +isort==4.3.21 +ipdb==0.12 + +-r test.txt + +-e . diff --git a/requirements/test.txt b/requirements/test.txt new file mode 100644 index 0000000..8f1ce63 --- /dev/null +++ b/requirements/test.txt @@ -0,0 +1 @@ +pytest==4.6.3 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..652f381 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,13 @@ +[isort] +line_length=99 +multi_line_output=5 +skip_glob=**/migrations/*.py +not_skip=__init__.py +known_first_party=metadrive +include_trailing_comma=True +default_section=THIRDPARTY + +[flake8] +max-line-length = 120 +exclude = .git,*/migrations/* +ignore = D100,D101,D102,D103,D104,D105,D105,D107,D200,D204,D205,D400,D401,C812,W504 diff --git a/setup.py b/setup.py index f6c5551..de2f4c1 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,5 @@ # Copyright (c) 2018 WeFindX Foundation, CLG. # All Rights Reserved. - -import os from setuptools import find_packages, setup try: @@ -21,54 +19,31 @@ author='Mindey', author_email='mindey@qq.com', license='Apache 2.0', - packages = find_packages(exclude=['docs', 'tests*']), + packages=find_packages(exclude=['docs', 'tests*']), install_requires=[ - 'Deprecated==1.2.5', - 'fusepy==3.0.1', - # 'PyGithub==1.43.7', - # 'pygithub3==0.5.1', - 'aiofiles==0.4.0', - 'apiage==0.1.4', - 'asyncio==3.4.3', - 'bs4==0.0.1', - 'celery==4.2.1', + 'deprecated==1.2.5', 'click==7.0', - 'feedparser==5.2.1', - 'gitpython==2.1.11', - 'gpgrecord==0.0.4', - 'ipython==7.3.0', - 'jinja2==2.10.1', + 'fusepy==3.0.1', + 'tqdm==4.31.1', + 'yolk3k==0.9', # TODO 'metatype', 'metawiki', 'metaform', - 'pypandoc==1.4', # only for converting README.md - 'paramiko==2.4.2', - 'pyautogui==0.9.42', - 'pymongo==3.7.2', - 'pysocks==1.6.8', - 'pytest==4.4.1', - 'python-dateutil==2.8.0', - 'python3-xlib==0.15', - 'requests==2.21.0', - 'selenium==3.141.0', - 'slumber==0.7.1', - 'Sphinx==2.0.1', - 'tqdm==4.31.1', 'typology', - 'yolk3k==0.9', - 'xarray==0.12.1', - 'urllib3==1.24.3' # not sure if necessary + + # 'selenium==3.141.0', # for _selenuim + # 'xarray==0.12.1', # for _xarray ], - extras_require = { + extras_require={ 'test': ['coverage', 'pytest', 'pytest-cov'], }, zip_safe=False, - entry_points = { + entry_points={ 'console_scripts': [ 'connect=metadrive.cli:connect' ], }, - package_data = { + package_data={ 'metadrive': [] }