diff --git a/.gitignore b/.gitignore index b56215e..25f7e44 100644 --- a/.gitignore +++ b/.gitignore @@ -398,3 +398,4 @@ $RECYCLE.BIN/ *.lnk # End of https://www.gitignore.io/api/node,java,linux,macos,maven,python,eclipse,windows,virtualenv,intellij+all +.vscode/ diff --git a/gcdu/__version__.py b/gcdu/__version__.py index 2cf0804..84874bc 100644 --- a/gcdu/__version__.py +++ b/gcdu/__version__.py @@ -8,4 +8,4 @@ # # Google Cloud Datastore Utils -__version__ = '0.0.4' +__version__ = '0.0.5' diff --git a/gcdu/commands/export.py b/gcdu/commands/export.py index 2a176d0..047f00a 100644 --- a/gcdu/commands/export.py +++ b/gcdu/commands/export.py @@ -45,12 +45,13 @@ def export(project, namespace, data_dir, project_placeholder, 'project_placeholder': project_placeholder, 'namespace_placeholder': namespace_placeholder, 'kinds': kinds, + 'chunk': None, 'target': execute_export }) def execute_export(project, namespace, data_dir, project_placeholder, - namespace_placeholder, kind): + namespace_placeholder, kind, chunk): datastore = get_datastore_api() request_body = { @@ -67,16 +68,27 @@ def execute_export(project, namespace, data_dir, project_placeholder, } } - response = datastore.projects() \ - .runQuery(projectId=project, body=request_body) \ - .execute() + entities_replaced = [] + more = True + + while more: + response = datastore.projects() \ + .runQuery(projectId=project, body=request_body) \ + .execute() - entities = extract_entities(response) - entities_json = json.dumps(entities) - entities_replaced_json = partition_replace(entities_json, project, - project_placeholder, namespace, - namespace_placeholder) - entities_replaced = json.loads(entities_replaced_json) + more = not (response['batch']['moreResults'] in ['NO_MORE_RESULTS']) + cursor = response['batch']['endCursor'] + + if more and cursor: + request_body['query']['startCursor'] = cursor + + entities = extract_entities(response) + entities_json = json.dumps(entities) + entities_replaced_json = partition_replace(entities_json, project, + project_placeholder, namespace, + namespace_placeholder) + + entities_replaced.extend(json.loads(entities_replaced_json)) save(entities_replaced, kind, data_dir) diff --git a/gcdu/commands/import_cmd.py b/gcdu/commands/import_cmd.py index 4cb6bbd..e5c6e0e 100644 --- a/gcdu/commands/import_cmd.py +++ b/gcdu/commands/import_cmd.py @@ -4,7 +4,7 @@ import click -from .utils import (get_datastore_api, partition_replace, load, execute_tasks) +from .utils import (get_datastore_api, partition_replace, load, execute_tasks, split_lists) @click.command('import') @@ -34,8 +34,12 @@ @click.option('--kinds', '-k', help='Comma separated list of Datastore Kinds to import.', required=True) +@click.option('--chunk', '-c', + help='A valid int number', + default=500, + required=False) def import_cmd(project, namespace, data_dir, project_placeholder, - namespace_placeholder, kinds): + namespace_placeholder, kinds, chunk): """Import data to database using previously exported data as input.""" execute_tasks({ 'type_task': 'import', @@ -45,12 +49,13 @@ def import_cmd(project, namespace, data_dir, project_placeholder, 'project_placeholder': project_placeholder, 'namespace_placeholder': namespace_placeholder, 'kinds': kinds, - 'target': execute_import + 'chunk': chunk, + 'target': execute_import, }) def execute_import(project, namespace, data_dir, project_placeholder, - namespace_placeholder, kind): + namespace_placeholder, kind, chunk): datastore = get_datastore_api() entities = load(kind, data_dir) @@ -61,15 +66,19 @@ def execute_import(project, namespace, data_dir, project_placeholder, namespace) entities_replaced = json.loads(entities_replaced_json) - inserts = [ - {'insert': entity} for entity in entities_replaced - ] + + _iter = split_lists(entities_replaced, chunk) + + for _next in _iter: + inserts = [ + {'insert': entity} for entity in _next + ] - request_body = { - "mutations": inserts, - "mode": "NON_TRANSACTIONAL" - } + request_body = { + "mutations": inserts, + "mode": "NON_TRANSACTIONAL" + } - datastore.projects() \ - .commit(projectId=project, body=request_body) \ - .execute() + datastore.projects() \ + .commit(projectId=project, body=request_body) \ + .execute() diff --git a/gcdu/commands/utils.py b/gcdu/commands/utils.py index f06c7c6..c133c5a 100644 --- a/gcdu/commands/utils.py +++ b/gcdu/commands/utils.py @@ -20,7 +20,8 @@ 'data_dir', 'project_placeholder', 'namespace_placeholder', - 'kinds' + 'kinds', + 'chunk' ] ) @@ -87,7 +88,7 @@ def execute_tasks(kargs): args=( data.project, data.namespace, data.data_dir, data.project_placeholder, - data.namespace_placeholder, kind + data.namespace_placeholder, kind, data.chunk ) ) @@ -107,3 +108,18 @@ def execute_tasks(kargs): kinds_list_progress.pop(idx) click.echo('Finished!') + + +def split_lists(iterable, chunk_size): + """Generate sequences of `chunk_size` elements from `iterable`.""" + iterable = iter(iterable) + while True: + chunk = [] + try: + for _ in range(chunk_size): + chunk.append(iterable.next()) + yield chunk + except StopIteration: + if chunk: + yield chunk + break \ No newline at end of file