diff --git a/README.md b/README.md index 798c2c8..b166dee 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,18 @@ Simply run: $ pipsi install gcdu +You need to authenticate in the project using gcloud sdk: + + $ gcloud auth login + $ gcloud config set project PROJECT_ID + +Or exporting the environment variable: + + $ export GOOGLE_APPLICATION_CREDENTIALS='key.json' + +More help in this link https://developers.google.com/identity/protocols/application-default-credentials + +--- # Usage @@ -16,3 +28,11 @@ To use it: $ gcdu --help +Export command: + + $ gcdu export -p [project] -n [namespace] -k [comma separated list of datastore kinds] + +Import command: + + $ gcdu import -p [project] -n [namespace] -k [comma separated list of datastore kinds] + diff --git a/gcdu/__version__.py b/gcdu/__version__.py index a85d36d..2cf0804 100644 --- a/gcdu/__version__.py +++ b/gcdu/__version__.py @@ -8,4 +8,4 @@ # # Google Cloud Datastore Utils -__version__ = '0.0.3' +__version__ = '0.0.4' diff --git a/gcdu/cli.py b/gcdu/cli.py index a0e61b2..c44c69c 100644 --- a/gcdu/cli.py +++ b/gcdu/cli.py @@ -12,5 +12,6 @@ def main(): """Utilities for Google Cloud Datastore.""" pass + main.add_command(export) main.add_command(import_cmd) diff --git a/gcdu/commands/export.py b/gcdu/commands/export.py index cdebdbe..2a176d0 100644 --- a/gcdu/commands/export.py +++ b/gcdu/commands/export.py @@ -1,9 +1,10 @@ # -*- coding: utf-8 -*- """The export command.""" import json + import click -from .utils import get_datastore_api, get_kinds_list, show_progressbar_item, partition_replace, save +from .utils import (get_datastore_api, partition_replace, save, execute_tasks) @click.command() @@ -19,29 +20,37 @@ default='./data', show_default=True) @click.option('--project-placeholder', '-pp', - help='Placeholder value to replace the project value in exported files.', + help='Placeholder value to replace the project value' + ' in exported files.', required=True, default='___PROJECT___', show_default=True) @click.option('--namespace-placeholder', '-np', - help='Placeholder value to replace the namespace value in exported files.', + help='Placeholder value to replace the namespace value' + ' in exported files.', required=True, default='___NAMESPACE___', show_default=True) @click.option('--kinds', '-k', help='Comma separated list of Datastore Kinds to export.', required=True) -def export(project, namespace, data_dir, project_placeholder, namespace_placeholder, kinds): +def export(project, namespace, data_dir, project_placeholder, + namespace_placeholder, kinds): """Export data from database.""" - kinds_list = get_kinds_list(kinds) - click.echo("Executing export. Project={}, Namespace={}, Kinds={}.".format(project, namespace, kinds_list)) - with click.progressbar(kinds_list, label='Exporting', show_eta=True, - item_show_func=show_progressbar_item) as bar: - for kind in bar: - execute_export(project, namespace, data_dir, project_placeholder, namespace_placeholder, kind) + execute_tasks({ + 'type_task': 'export', + 'project': project, + 'namespace': namespace, + 'data_dir': data_dir, + 'project_placeholder': project_placeholder, + 'namespace_placeholder': namespace_placeholder, + 'kinds': kinds, + 'target': execute_export + }) -def execute_export(project, namespace, data_dir, project_placeholder, namespace_placeholder, kind): +def execute_export(project, namespace, data_dir, project_placeholder, + namespace_placeholder, kind): datastore = get_datastore_api() request_body = { @@ -64,16 +73,16 @@ def execute_export(project, namespace, data_dir, project_placeholder, namespace_ entities = extract_entities(response) entities_json = json.dumps(entities) - entities_replaced_json = partition_replace(entities_json, project, project_placeholder, namespace, - namespace_placeholder) + entities_replaced_json = partition_replace(entities_json, project, + project_placeholder, namespace, + namespace_placeholder) entities_replaced = json.loads(entities_replaced_json) save(entities_replaced, kind, data_dir) def extract_entities(response): - entities = [] - for entityResult in response.get('batch').get('entityResults'): - entity = entityResult.get('entity') - entities.append(entity) - return entities + return [ + entityResult.get('entity') + for entityResult in response.get('batch', {}).get('entityResults', []) + ] diff --git a/gcdu/commands/import_cmd.py b/gcdu/commands/import_cmd.py index 561e472..4cb6bbd 100644 --- a/gcdu/commands/import_cmd.py +++ b/gcdu/commands/import_cmd.py @@ -1,9 +1,10 @@ # -*- coding: utf-8 -*- """The import command.""" import json + import click -from .utils import get_datastore_api, get_kinds_list, show_progressbar_item, partition_replace, load +from .utils import (get_datastore_api, partition_replace, load, execute_tasks) @click.command('import') @@ -19,43 +20,50 @@ default='./data', show_default=True) @click.option('--project-placeholder', '-pp', - help='Placeholder value to replace the project value in previously exported files.', + help='Placeholder value to replace the project value in' + ' previously exported files.', required=True, default='___PROJECT___', show_default=True) @click.option('--namespace-placeholder', '-np', - help='Placeholder value to replace the namespace value in previously exported files.', + help='Placeholder value to replace the namespace value in' + ' previously exported files.', required=True, default='___NAMESPACE___', show_default=True) @click.option('--kinds', '-k', help='Comma separated list of Datastore Kinds to import.', required=True) -def import_cmd(project, namespace, data_dir, project_placeholder, namespace_placeholder, kinds): +def import_cmd(project, namespace, data_dir, project_placeholder, + namespace_placeholder, kinds): """Import data to database using previously exported data as input.""" - kinds_list = get_kinds_list(kinds) - click.echo("Executing import. Project={}, Namespace={}, Kinds={}.".format(project, namespace, kinds_list)) - with click.progressbar(kinds_list, label='Importing', show_eta=True, - item_show_func=show_progressbar_item) as bar: - for kind in bar: - execute_import(project, namespace, data_dir, project_placeholder, namespace_placeholder, kind) + execute_tasks({ + 'type_task': 'import', + 'project': project, + 'namespace': namespace, + 'data_dir': data_dir, + 'project_placeholder': project_placeholder, + 'namespace_placeholder': namespace_placeholder, + 'kinds': kinds, + 'target': execute_import + }) -def execute_import(project, namespace, data_dir, project_placeholder, namespace_placeholder, kind): +def execute_import(project, namespace, data_dir, project_placeholder, + namespace_placeholder, kind): datastore = get_datastore_api() entities = load(kind, data_dir) entities_json = json.dumps(entities) - entities_replaced_json = partition_replace(entities_json, project_placeholder, project, - namespace_placeholder, namespace) + entities_replaced_json = partition_replace(entities_json, + project_placeholder, project, + namespace_placeholder, + namespace) entities_replaced = json.loads(entities_replaced_json) - inserts = [] - for entity in entities_replaced: - insert = { - 'insert': entity - } - inserts.append(insert) + inserts = [ + {'insert': entity} for entity in entities_replaced + ] request_body = { "mutations": inserts, diff --git a/gcdu/commands/utils.py b/gcdu/commands/utils.py index c1c6fe8..f06c7c6 100644 --- a/gcdu/commands/utils.py +++ b/gcdu/commands/utils.py @@ -1,10 +1,29 @@ # -*- coding: utf-8 -*- """Utilities.""" +import copy import io -import os import json +import os +import threading +from collections import namedtuple + +import click import googleapiclient.discovery +cls_task = namedtuple( + 'Task', + [ + 'type_task', + 'project', + 'namespace', + 'target', + 'data_dir', + 'project_placeholder', + 'namespace_placeholder', + 'kinds' + ] +) + def get_datastore_api(): return googleapiclient.discovery.build('datastore', 'v1') @@ -20,7 +39,8 @@ def show_progressbar_item(item): return "Kind: {}".format(item) -def partition_replace(entities_json, from_project, to_project, from_namespace, to_namespace): +def partition_replace(entities_json, from_project, to_project, from_namespace, + to_namespace): result = entities_json.replace('"projectId": "{}"'.format(from_project), '"projectId": "{}"'.format(to_project)) result = result.replace('"namespaceId": "{}"'.format(from_namespace), @@ -29,15 +49,61 @@ def partition_replace(entities_json, from_project, to_project, from_namespace, t def save(entities, kind, data_dir): + if not entities: + click.echo('No entities found for kind {}'.format(kind)) + return if not os.path.exists(data_dir): os.makedirs(data_dir) - with io.open('{}/{}.json'.format(data_dir, kind), 'w', encoding='utf-8') as export_file: + with io.open('{}/{}.json'.format(data_dir, kind), 'w', + encoding='utf-8') as export_file: export_file.write( - json.dumps(entities, ensure_ascii=False, sort_keys=True, indent=2, separators=(',', ': '))) + json.dumps(entities, ensure_ascii=False, sort_keys=True, indent=2, + separators=(u',', u': '))) def load(kind, data_dir): - with io.open('{}/{}.json'.format(data_dir, kind), 'r', encoding='utf-8') as export_file: + with io.open('{}/{}.json'.format(data_dir, kind), 'r', + encoding='utf-8') as export_file: entities = json.load(export_file) return entities + + +def execute_tasks(kargs): + data = cls_task(**kargs) + kinds_list = get_kinds_list(data.kinds) + click.echo( + "Executing {}. Project={}, Namespace={}, Kinds={}.".format( + data.type_task, + data.project, + data.namespace, + kinds_list)) + + tasks = {} + for kind in kinds_list: + tasks[kind] = threading.Thread( + target=data.target, + name=kind, + args=( + data.project, data.namespace, data.data_dir, + data.project_placeholder, + data.namespace_placeholder, kind + ) + ) + + kinds_list_progress = copy.deepcopy(kinds_list) + + click.echo('Starting tasks...') + for task in tasks: + tasks[task].start() + + click.echo('Done. {} tasks started.'.format(len(tasks))) + click.echo('Executing...') + + while kinds_list_progress: + for idx, kind in enumerate(kinds_list_progress): + if not tasks.get(kind).is_alive(): + click.echo('Done. Kind={}'.format(kind)) + kinds_list_progress.pop(idx) + + click.echo('Finished!')