From e6c797d737a8cb4be74e70d72d8eaa595d912332 Mon Sep 17 00:00:00 2001 From: Chaitanya-Endurance Date: Tue, 2 Jan 2024 15:11:19 +0530 Subject: [PATCH 1/2] Added support for classification report --- mars/learn/metrics/_classification.py | 232 ++++++++++++++++++++++++++ 1 file changed, 232 insertions(+) diff --git a/mars/learn/metrics/_classification.py b/mars/learn/metrics/_classification.py index b0ffe5ba63..ef18f68a91 100644 --- a/mars/learn/metrics/_classification.py +++ b/mars/learn/metrics/_classification.py @@ -1473,3 +1473,235 @@ def fbeta_score( zero_division=zero_division, ) return f + +def classification_report( + y_true, + y_pred, + *, + labels=None, + target_names=None, + sample_weight=None, + digits=2, + output_dict=False, + zero_division="warn", +): + """Build a text report showing the main classification metrics. + + + + Parameters + ---------- + y_true : 1d array-like, or label indicator array / sparse matrix + Ground truth (correct) target values. + + y_pred : 1d array-like, or label indicator array / sparse matrix + Estimated targets as returned by a classifier. + + labels : array-like of shape (n_labels,), default=None + Optional list of label indices to include in the report. + + target_names : array-like of shape (n_labels,), default=None + Optional display names matching the labels (same order). + + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. + + digits : int, default=2 + Number of digits for formatting output floating point values. + When ``output_dict`` is ``True``, this will be ignored and the + returned values will not be rounded. + + output_dict : bool, default=False + If True, return output as dict. + + .. versionadded:: 0.20 + + zero_division : {"warn", 0.0, 1.0, np.nan}, default="warn" + Sets the value to return when there is a zero division. If set to + "warn", this acts as 0, but warnings are also raised. + + .. versionadded:: 1.3 + `np.nan` option was added. + + Returns + ------- + report : str or dict + Text summary of the precision, recall, F1 score for each class. + Dictionary returned if output_dict is True. Dictionary has the + following structure:: + + {'label 1': {'precision':0.5, + 'recall':1.0, + 'f1-score':0.67, + 'support':1}, + 'label 2': { ... }, + ... + } + + The reported averages include macro average (averaging the unweighted + mean per label), weighted average (averaging the support-weighted mean + per label), and sample average (only for multilabel classification). + Micro average (averaging the total true positives, false negatives and + false positives) is only shown for multi-label or multi-class + with a subset of classes, because it corresponds to accuracy + otherwise and would be the same for all metrics. + See also :func:`precision_recall_fscore_support` for more details + on averages. + + Note that in binary classification, recall of the positive class + is also known as "sensitivity"; recall of the negative class is + "specificity". + + See Also + -------- + precision_recall_fscore_support: Compute precision, recall, F-measure and + support for each class. + confusion_matrix: Compute confusion matrix to evaluate the accuracy of a + classification. + multilabel_confusion_matrix: Compute a confusion matrix for each class or sample. + + Examples + -------- + >>> from sklearn.metrics import classification_report + >>> y_true = [0, 1, 2, 2, 2] + >>> y_pred = [0, 0, 2, 2, 1] + >>> target_names = ['class 0', 'class 1', 'class 2'] + >>> print(classification_report(y_true, y_pred, target_names=target_names)) + precision recall f1-score support + + class 0 0.50 1.00 0.67 1 + class 1 0.00 0.00 0.00 1 + class 2 1.00 0.67 0.80 3 + + accuracy 0.60 5 + macro avg 0.50 0.56 0.49 5 + weighted avg 0.70 0.60 0.61 5 + + >>> y_pred = [1, 1, 0] + >>> y_true = [1, 1, 1] + >>> print(classification_report(y_true, y_pred, labels=[1, 2, 3])) + precision recall f1-score support + + 1 1.00 0.67 0.80 3 + 2 0.00 0.00 0.00 0 + 3 0.00 0.00 0.00 0 + + micro avg 1.00 0.67 0.80 3 + macro avg 0.33 0.22 0.27 3 + weighted avg 1.00 0.67 0.80 3 + + """ + + y_type, y_true, y_pred = _check_targets(y_true, y_pred) + + if labels is None: + labels = unique_labels(y_true, y_pred) + labels_given = False + else: + labels = np.asarray(labels) + labels_given = True + + # labelled micro average + micro_is_accuracy = (y_type == "multiclass" or y_type == "binary") and ( + not labels_given or (set(labels) == set(unique_labels(y_true, y_pred))) + ) + + if target_names is not None and len(labels) != len(target_names): + if labels_given: + warnings.warn( + "labels size, {0}, does not match size of target_names, {1}".format( + len(labels), len(target_names) + ) + ) + else: + raise ValueError( + "Number of classes, {0}, does not match size of " + "target_names, {1}. Try specifying the labels " + "parameter".format(len(labels), len(target_names)) + ) + if target_names is None: + target_names = ["%s" % l for l in labels] + + headers = ["precision", "recall", "f1-score", "support"] + # compute per-class results without averaging + p, r, f1, s = precision_recall_fscore_support( + y_true, + y_pred, + labels=labels, + average=None, + sample_weight=sample_weight, + zero_division=zero_division, + ) + rows = zip(target_names, p, r, f1, s) + + if y_type.startswith("multilabel"): + average_options = ("micro", "macro", "weighted", "samples") + else: + average_options = ("micro", "macro", "weighted") + + if output_dict: + report_dict = {label[0]: label[1:] for label in rows} + for label, scores in report_dict.items(): + report_dict[label] = dict(zip(headers, [float(i) for i in scores])) + else: + longest_last_line_heading = "weighted avg" + name_width = max(len(cn) for cn in target_names) + width = max(name_width, len(longest_last_line_heading), digits) + head_fmt = "{:>{width}s} " + " {:>9}" * len(headers) + report = head_fmt.format("", *headers, width=width) + report += "\n\n" + row_fmt = "{:>{width}s} " + " {:>9.{digits}f}" * 3 + " {:>9}\n" + for row in rows: + report += row_fmt.format(*row, width=width, digits=digits) + report += "\n" + + # compute all applicable averages + for average in average_options: + if average.startswith("micro") and micro_is_accuracy: + line_heading = "accuracy" + else: + line_heading = average + " avg" + + # compute averages with specified averaging method + avg_p, avg_r, avg_f1, _ = precision_recall_fscore_support( + y_true, + y_pred, + labels=labels, + average=average, + sample_weight=sample_weight, + zero_division=zero_division, + ) + avg = [avg_p, avg_r, avg_f1, np.sum(s)] + + if output_dict: + report_dict[line_heading] = dict(zip(headers, [float(i) for i in avg])) + else: + if line_heading == "accuracy": + row_fmt_accuracy = ( + "{:>{width}s} " + + " {:>9.{digits}}" * 2 + + " {:>9.{digits}f}" + + " {:>9}\n" + ) + report += row_fmt_accuracy.format( + line_heading, "", "", *avg[2:], width=width, digits=digits + ) + else: + report += row_fmt.format(line_heading, *avg, width=width, digits=digits) + + if output_dict: + if "accuracy" in report_dict.keys(): + report_dict["accuracy"] = report_dict["accuracy"]["precision"] + return report_dict + else: + return report + + + # @validate_params( + # { + # "y_true": ["array-like", "sparse matrix"], + # "y_pred": ["array-like", "sparse matrix"], + # "sample_weight": ["array-like", None], + # }, + # prefer_skip_nested_validation=True, + # ) \ No newline at end of file From 049d9e85beee693d44c041502d45eb9ea757cfa6 Mon Sep 17 00:00:00 2001 From: Chaitanya-Endurance Date: Tue, 2 Jan 2024 15:30:01 +0530 Subject: [PATCH 2/2] checks --- mars/_version.py | 155 +++++++++++------- mars/config.py | 2 +- mars/conftest.py | 12 +- mars/core/custom_log.py | 2 +- mars/core/entity/tests/test_utils.py | 9 +- mars/core/graph/builder/chunk.py | 4 +- mars/dataframe/arithmetic/core.py | 4 +- mars/dataframe/base/eval.py | 12 +- mars/dataframe/base/rechunk.py | 5 +- mars/dataframe/base/shift.py | 4 +- mars/dataframe/base/tests/test_base.py | 8 +- .../base/tests/test_base_execution.py | 37 ++--- .../raydataset/tests/test_mldataset.py | 9 +- .../raydataset/tests/test_raydataset.py | 9 +- mars/dataframe/core.py | 13 +- mars/dataframe/datasource/date_range.py | 4 +- mars/dataframe/datasource/from_tensor.py | 8 +- mars/dataframe/datasource/read_csv.py | 2 +- mars/dataframe/datasource/read_raydataset.py | 3 +- .../tests/test_datasource_execution.py | 6 +- mars/dataframe/datastore/to_csv.py | 2 +- mars/dataframe/groupby/__init__.py | 4 +- mars/dataframe/groupby/aggregation.py | 56 +++---- mars/dataframe/groupby/core.py | 15 +- mars/dataframe/groupby/tests/test_groupby.py | 4 +- .../groupby/tests/test_groupby_execution.py | 23 +-- mars/dataframe/indexing/setitem.py | 8 +- .../dataframe/indexing/tests/test_indexing.py | 8 +- .../indexing/tests/test_indexing_execution.py | 17 +- mars/dataframe/indexing/where.py | 4 +- mars/dataframe/merge/concat.py | 4 +- .../merge/tests/test_merge_execution.py | 10 +- mars/dataframe/reduction/all.py | 8 +- mars/dataframe/reduction/any.py | 8 +- mars/dataframe/reduction/core.py | 13 +- mars/dataframe/reduction/kurtosis.py | 12 +- mars/dataframe/reduction/skew.py | 8 +- .../reduction/tests/test_reduction.py | 6 +- .../tests/test_reduction_execution.py | 5 +- mars/dataframe/reduction/var.py | 4 +- mars/dataframe/sort/psrs.py | 4 +- .../sort/tests/test_sort_execution.py | 5 +- mars/dataframe/statistics/corr.py | 8 +- mars/dataframe/tests/test_utils.py | 11 +- mars/dataframe/utils.py | 2 +- mars/dataframe/window/ewm/aggregation.py | 10 +- .../dataframe/window/expanding/aggregation.py | 6 +- mars/deploy/kubedl/client.py | 6 +- mars/deploy/kubedl/config.py | 4 +- mars/deploy/kubernetes/config.py | 23 +-- mars/deploy/kubernetes/tests/test_config.py | 14 +- mars/deploy/oscar/local.py | 6 +- mars/deploy/oscar/ray.py | 8 +- mars/deploy/oscar/session.py | 13 +- .../tests/test_clean_up_and_restore_func.py | 12 +- mars/deploy/oscar/tests/test_local.py | 21 +-- mars/deploy/oscar/tests/test_ray.py | 4 +- mars/deploy/oscar/tests/test_ray_client.py | 6 +- .../tests/test_ray_cluster_standalone.py | 14 +- .../oscar/tests/test_ray_dag_failover.py | 10 +- mars/deploy/oscar/tests/test_ray_dag_oscar.py | 4 +- .../oscar/tests/test_ray_fault_injection.py | 10 +- .../oscar/tests/test_ray_load_modules.py | 13 +- .../deploy/oscar/tests/test_ray_scheduling.py | 25 ++- mars/deploy/yarn/tests/test_config.py | 2 +- mars/learn/cluster/_k_means_elkan_iter.py | 12 +- mars/learn/cluster/_k_means_lloyd_iter.py | 12 +- mars/learn/cluster/_kmeans.py | 4 +- mars/learn/cluster/tests/test_k_means.py | 5 +- .../contrib/lightgbm/tests/test_classifier.py | 5 +- mars/learn/contrib/statsmodels/predict.py | 9 +- mars/learn/contrib/xgboost/core.py | 5 +- mars/learn/contrib/xgboost/dmatrix.py | 4 +- mars/learn/contrib/xgboost/start_tracker.py | 4 +- mars/learn/decomposition/_base.py | 2 +- mars/learn/decomposition/_pca.py | 4 +- mars/learn/decomposition/tests/test_pca.py | 16 +- .../decomposition/tests/test_truncated_svd.py | 2 +- mars/learn/ensemble/_bagging.py | 16 +- mars/learn/ensemble/_blockwise.py | 5 +- mars/learn/ensemble/tests/test_bagging.py | 10 +- mars/learn/ensemble/tests/test_blockwise.py | 4 +- mars/learn/linear_model/_base.py | 7 +- mars/learn/metrics/_classification.py | 22 +-- mars/learn/metrics/_ranking.py | 2 +- mars/learn/metrics/_scorer.py | 6 +- mars/learn/metrics/pairwise/core.py | 5 +- .../tests/test_euclidean_distances.py | 8 +- mars/learn/neighbors/_faiss.py | 8 +- mars/learn/neighbors/tests/test_faiss.py | 20 +-- mars/learn/preprocessing/normalize.py | 4 +- mars/learn/proxima/simple_index/builder.py | 4 +- mars/learn/proxima/simple_index/searcher.py | 2 +- mars/learn/utils/core.py | 4 +- mars/lib/aio/lru.py | 13 +- mars/lib/bloom_filter.py | 24 +-- mars/lib/filesystem/tests/test_s3.py | 1 + mars/lib/sparse/__init__.py | 2 +- mars/lib/sparse/array.py | 2 +- mars/lib/sparse/tests/test_sparse.py | 24 +-- mars/lib/tblib/__init__.py | 125 ++++++++------ mars/lib/tblib/decorators.py | 4 +- mars/lib/tblib/pickling_support.py | 5 +- .../backends/prometheus/prometheus_metric.py | 5 +- .../column_pruning/column_pruning_rule.py | 4 +- .../tileable/tests/test_arithmetic_query.py | 6 +- mars/optimization/physical/numexpr.py | 10 +- mars/oscar/backends/communication/utils.py | 2 +- mars/oscar/backends/config.py | 4 +- mars/oscar/backends/mars/tests/test_debug.py | 4 +- mars/oscar/backends/message.pyi | 10 -- mars/oscar/backends/ray/communication.py | 8 +- mars/oscar/backends/ray/pool.py | 4 +- mars/oscar/tests/test_actorcaller.py | 5 +- mars/oscar/tests/test_batch.py | 3 +- mars/serialization/tests/test_serial.py | 3 +- mars/services/cluster/api/web.py | 9 +- mars/services/cluster/gather.py | 5 +- mars/services/cluster/tests/test_locator.py | 6 +- mars/services/cluster/tests/test_service.py | 5 +- mars/services/meta/api/oscar.py | 4 +- mars/services/meta/tests/test_api.py | 9 +- mars/services/meta/tests/test_service.py | 5 +- mars/services/scheduling/api/web.py | 5 +- .../scheduling/supervisor/autoscale.py | 2 +- .../supervisor/tests/test_speculation.py | 3 +- .../scheduling/worker/tests/test_execution.py | 11 +- mars/services/session/supervisor/core.py | 4 +- mars/services/session/tests/test_service.py | 10 +- mars/services/storage/api/web.py | 4 +- mars/services/storage/tests/test_service.py | 4 +- mars/services/storage/transfer.py | 2 +- mars/services/subtask/worker/runner.py | 3 +- mars/services/task/api/web.py | 10 +- mars/services/task/execution/api.py | 3 +- mars/services/task/execution/mars/executor.py | 4 +- mars/services/task/execution/ray/executor.py | 14 +- .../ray/tests/test_ray_execution_backend.py | 5 +- mars/services/task/supervisor/preprocessor.py | 9 +- mars/services/task/supervisor/processor.py | 13 +- mars/services/task/supervisor/task.py | 6 +- .../supervisor/tests/test_task_manager.py | 9 +- mars/services/tests/fault_injection_patch.py | 6 +- mars/storage/shared_memory.py | 1 + .../tests/test_arithmetic_execution.py | 2 +- mars/tensor/base/repeat.py | 4 +- mars/tensor/datasource/diag.py | 4 +- mars/tensor/linalg/cholesky.py | 10 +- mars/tensor/rechunk/core.py | 3 +- mars/tensor/reduction/var.py | 4 +- mars/tensor/spatial/distance/squareform.py | 4 +- mars/tensor/special/ellip_func_integrals.py | 1 + mars/tensor/special/tests/test_special.py | 6 +- .../special/tests/test_special_execution.py | 3 +- mars/tensor/statistics/bincount.py | 5 +- mars/tensor/statistics/histogram.py | 9 +- mars/tensor/stats/ks.py | 2 +- .../stats/tests/test_stats_execution.py | 40 ++--- mars/tensor/stats/ttest.py | 6 +- mars/tensor/ufunc/ufunc.py | 8 +- mars/tests/test_resource.py | 20 +-- mars/utils.py | 12 +- 162 files changed, 632 insertions(+), 929 deletions(-) diff --git a/mars/_version.py b/mars/_version.py index e3b8ae21dd..742480dba8 100644 --- a/mars/_version.py +++ b/mars/_version.py @@ -1,4 +1,3 @@ - # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build @@ -60,17 +59,18 @@ class NotThisMethod(Exception): def register_vcs_handler(vcs, method): # decorator """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f + return decorate -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) process = None @@ -86,10 +86,14 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, try: dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None), **popen_kwargs) + process = subprocess.Popen( + [command] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + **popen_kwargs, + ) break except OSError: e = sys.exc_info()[1] @@ -124,15 +128,21 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @@ -191,7 +201,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -200,7 +210,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -208,24 +218,31 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] + r = ref[len(tag_prefix) :] # Filter out refs that exactly match prefix or that don't start # with a number once the prefix is stripped (mostly a concern # when prefix is '') - if not re.match(r'\d', r): + if not re.match(r"\d", r): continue if verbose: print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + } @register_vcs_handler("git", "pieces_from_vcs") @@ -247,8 +264,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): env.pop("GIT_DIR", None) runner = functools.partial(runner, env=env) - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -256,10 +272,19 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, [ - "describe", "--tags", "--dirty", "--always", "--long", - "--match", f"{tag_prefix}[[:digit:]]*" - ], cwd=root) + describe_out, rc = runner( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + f"{tag_prefix}[[:digit:]]*", + ], + cwd=root, + ) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") @@ -274,8 +299,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) # --abbrev-ref was added in git-1.6.3 if rc != 0 or branch_name is None: raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") @@ -315,17 +339,16 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] + git_describe = git_describe[: git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) if not mo: # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) + pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out return pieces # tag @@ -334,10 +357,12 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] + pieces["closest-tag"] = full_tag[len(tag_prefix) :] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) @@ -386,8 +411,7 @@ def render_pep440(pieces): rendered += ".dirty" else: # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -416,8 +440,7 @@ def render_pep440_branch(pieces): rendered = "0" if pieces["branch"] != "master": rendered += ".dev0" - rendered += "+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -578,11 +601,13 @@ def render_git_describe_long(pieces): def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + } if not style or style == "default": style = "pep440" # the default @@ -606,9 +631,13 @@ def render(pieces, style): else: raise ValueError("unknown style '%s'" % style) - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + } def get_versions(): @@ -622,8 +651,7 @@ def get_versions(): verbose = cfg.verbose try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass @@ -632,13 +660,16 @@ def get_versions(): # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. - for _ in cfg.versionfile_source.split('/'): + for _ in cfg.versionfile_source.split("/"): root = os.path.dirname(root) except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None, + } try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) @@ -652,6 +683,10 @@ def get_versions(): except NotThisMethod: pass - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } diff --git a/mars/config.py b/mars/config.py index c029d6b6c8..94947553aa 100644 --- a/mars/config.py +++ b/mars/config.py @@ -364,7 +364,7 @@ def validate(x): # the default chunk store size default_options.register_option( - "chunk_store_limit", 128 * 1024**2, validator=is_numeric + "chunk_store_limit", 128 * 1024 ** 2, validator=is_numeric ) default_options.register_option( "chunk_size", None, validator=any_validator(is_null, is_integer), serialize=True diff --git a/mars/conftest.py b/mars/conftest.py index 2a304ff61c..1ba22e0e69 100644 --- a/mars/conftest.py +++ b/mars/conftest.py @@ -55,7 +55,7 @@ def ray_start_regular_shared2(request): # pragma: no cover os.environ["RAY_kill_idle_workers_interval_ms"] = "0" param = getattr(request, "param", {}) num_cpus = param.get("num_cpus", 64) - total_memory_mb = num_cpus * 2 * 1024**2 + total_memory_mb = num_cpus * 2 * 1024 ** 2 try: try: job_config = ray.job_config.JobConfig(total_memory_mb=total_memory_mb) @@ -81,7 +81,7 @@ def _ray_start_regular(request): # pragma: no cover yield else: num_cpus = param.get("num_cpus", 64) - total_memory_mb = num_cpus * 2 * 1024**2 + total_memory_mb = num_cpus * 2 * 1024 ** 2 try: try: job_config = ray.job_config.JobConfig(total_memory_mb=total_memory_mb) @@ -117,12 +117,12 @@ def _ray_large_cluster(request): # pragma: no cover remote_nodes = [] for i in range(num_nodes): remote_nodes.append( - cluster.add_node(num_cpus=num_cpus, memory=num_cpus * 2 * 1024**3) + cluster.add_node(num_cpus=num_cpus, memory=num_cpus * 2 * 1024 ** 3) ) if len(remote_nodes) == 1: try: job_config = ray.job_config.JobConfig( - total_memory_mb=num_nodes * 32 * 1024**3 + total_memory_mb=num_nodes * 32 * 1024 ** 3 ) except TypeError: job_config = None @@ -153,10 +153,10 @@ async def ray_create_mars_cluster(request, check_router_cleaned): ray_config = _load_config() param = getattr(request, "param", {}) - supervisor_mem = param.get("supervisor_mem", 1 * 1024**3) + supervisor_mem = param.get("supervisor_mem", 1 * 1024 ** 3) worker_num = param.get("worker_num", 2) worker_cpu = param.get("worker_cpu", 2) - worker_mem = param.get("worker_mem", 256 * 1024**2) + worker_mem = param.get("worker_mem", 256 * 1024 ** 2) ray_config.update(param.get("config", {})) client = await new_cluster( supervisor_mem=supervisor_mem, diff --git a/mars/core/custom_log.py b/mars/core/custom_log.py index e0a8a6b79b..5c7b863feb 100644 --- a/mars/core/custom_log.py +++ b/mars/core/custom_log.py @@ -131,7 +131,7 @@ def fetch(self, offsets: List[int] = None, sizes: List[int] = None): offsets = self._chunk_op_key_to_offsets if sizes is None: - sizes = 1 * 1024**2 # 1M each time + sizes = 1 * 1024 ** 2 # 1M each time result: dict = self._session.fetch_tileable_op_logs( self._tileable_op_key, offsets=offsets, sizes=sizes diff --git a/mars/core/entity/tests/test_utils.py b/mars/core/entity/tests/test_utils.py index 381d991e9d..ce3bd55976 100644 --- a/mars/core/entity/tests/test_utils.py +++ b/mars/core/entity/tests/test_utils.py @@ -64,14 +64,7 @@ def test_recursive_tile_with_duplicated_submission(setup): raw = np.random.RandomState(0).rand(10) d1 = mt.tensor(raw, chunk_size=5) op = _TestOperandWithDuplicatedSubmission() - t = op.new_tensor( - [ - d1, - ], - dtype=d1.dtype, - shape=(10,), - order=d1.order, - ) + t = op.new_tensor([d1,], dtype=d1.dtype, shape=(10,), order=d1.order,) with pytest.raises(RuntimeError, match="submitted repeatedly"): t.execute(extra_config={"check_duplicated_submission": True}) diff --git a/mars/core/graph/builder/chunk.py b/mars/core/graph/builder/chunk.py index d6c81900a0..1f5c29b871 100644 --- a/mars/core/graph/builder/chunk.py +++ b/mars/core/graph/builder/chunk.py @@ -250,9 +250,7 @@ def _tile( self._add_nodes(chunk_graph, chunks, visited, tileable) def _gen_result_chunks( - self, - chunk_graph: ChunkGraph, - next_tileable_handlers: List[_TileableHandler], + self, chunk_graph: ChunkGraph, next_tileable_handlers: List[_TileableHandler], ): result_chunks = chunk_graph.result_chunks tileable_graph = self._tileable_graph diff --git a/mars/dataframe/arithmetic/core.py b/mars/dataframe/arithmetic/core.py index 1073811857..08ab1229fb 100644 --- a/mars/dataframe/arithmetic/core.py +++ b/mars/dataframe/arithmetic/core.py @@ -214,9 +214,7 @@ def _tile_scalar(cls, op): if chunk.ndim == 2: if lazy_chunk_meta: out_chunk = out_op.new_chunk( - [chunk], - shape=chunk.shape, - index=chunk.index, + [chunk], shape=chunk.shape, index=chunk.index, ) out_chunk._set_tileable_meta( tileable_key=df.key, diff --git a/mars/dataframe/base/eval.py b/mars/dataframe/base/eval.py index ce6f536192..75a7048229 100644 --- a/mars/dataframe/base/eval.py +++ b/mars/dataframe/base/eval.py @@ -331,10 +331,7 @@ def __call__(self, df, output_type, shape, dtypes): else: name, dtype = dtypes params = dict( - name=name, - dtype=dtype, - shape=shape, - index_value=new_index_value, + name=name, dtype=dtype, shape=shape, index_value=new_index_value, ) return self.new_tileable([df], **params) @@ -388,12 +385,7 @@ def tile(cls, op: "DataFrameEval"): if out_df.ndim == 1: new_nsplits = new_nsplits[:1] - params.update( - dict( - chunks=chunks, - nsplits=tuple(new_nsplits), - ) - ) + params.update(dict(chunks=chunks, nsplits=tuple(new_nsplits),)) return new_op.new_tileables([in_df], **params) @classmethod diff --git a/mars/dataframe/base/rechunk.py b/mars/dataframe/base/rechunk.py index f0c56ec403..4f69f3bed4 100644 --- a/mars/dataframe/base/rechunk.py +++ b/mars/dataframe/base/rechunk.py @@ -197,8 +197,5 @@ def rechunk(a: TileableType, chunk_size: chunk_size_type, reassign_worker=False) if chunk_size == a.nsplits: return a - op = DataFrameRechunk( - chunk_size=chunk_size, - reassign_worker=reassign_worker, - ) + op = DataFrameRechunk(chunk_size=chunk_size, reassign_worker=reassign_worker,) return op(a) diff --git a/mars/dataframe/base/shift.py b/mars/dataframe/base/shift.py index 6239c2d2ba..3bd02c6798 100644 --- a/mars/dataframe/base/shift.py +++ b/mars/dataframe/base/shift.py @@ -299,7 +299,9 @@ def _tile_series(cls, op): to_concats = [c] left = abs(op.periods) while left > 0 and 0 <= prev_i < inp.chunk_shape[0]: - prev_chunk = inp.cix[prev_i,] + prev_chunk = inp.cix[ + prev_i, + ] size = min(left, prev_chunk.shape[0]) left -= size prev_i = prev_i - 1 if inc else prev_i + 1 diff --git a/mars/dataframe/base/tests/test_base.py b/mars/dataframe/base/tests/test_base.py index 601951b0b6..469df94494 100644 --- a/mars/dataframe/base/tests/test_base.py +++ b/mars/dataframe/base/tests/test_base.py @@ -246,7 +246,7 @@ def test_rechunk(): def test_dataframe_apply(): cols = [chr(ord("A") + i) for i in range(10)] - df_raw = pd.DataFrame(dict((c, [i**2 for i in range(20)]) for c in cols)) + df_raw = pd.DataFrame(dict((c, [i ** 2 for i in range(20)]) for c in cols)) old_chunk_store_limit = options.chunk_store_limit try: @@ -368,7 +368,7 @@ def df_series_func_with_err(v): def test_series_apply(): idxes = [chr(ord("A") + i) for i in range(20)] - s_raw = pd.Series([i**2 for i in range(20)], index=idxes) + s_raw = pd.Series([i ** 2 for i in range(20)], index=idxes) series = from_pandas_series(s_raw, chunk_size=5) @@ -439,11 +439,11 @@ def apply_with_error(_): def test_transform(): cols = [chr(ord("A") + i) for i in range(10)] - df_raw = pd.DataFrame(dict((c, [i**2 for i in range(20)]) for c in cols)) + df_raw = pd.DataFrame(dict((c, [i ** 2 for i in range(20)]) for c in cols)) df = from_pandas_df(df_raw, chunk_size=5) idxes = [chr(ord("A") + i) for i in range(20)] - s_raw = pd.Series([i**2 for i in range(20)], index=idxes) + s_raw = pd.Series([i ** 2 for i in range(20)], index=idxes) series = from_pandas_series(s_raw, chunk_size=5) def rename_fn(f, new_name): diff --git a/mars/dataframe/base/tests/test_base_execution.py b/mars/dataframe/base/tests/test_base_execution.py index eb2d3994fa..04eb32d644 100644 --- a/mars/dataframe/base/tests/test_base_execution.py +++ b/mars/dataframe/base/tests/test_base_execution.py @@ -330,7 +330,7 @@ def test_describe_execution(setup): def test_data_frame_apply_execute(setup): cols = [chr(ord("A") + i) for i in range(10)] - df_raw = pd.DataFrame(dict((c, [i**2 for i in range(20)]) for c in cols)) + df_raw = pd.DataFrame(dict((c, [i ** 2 for i in range(20)]) for c in cols)) old_chunk_store_limit = options.chunk_store_limit try: @@ -407,11 +407,11 @@ def test_data_frame_apply_execute(setup): def test_data_frame_apply_closure_execute(setup): cols = [chr(ord("A") + i) for i in range(10)] - df_raw = pd.DataFrame(dict((c, [i**2 for i in range(20)]) for c in cols)) + df_raw = pd.DataFrame(dict((c, [i ** 2 for i in range(20)]) for c in cols)) df = from_pandas_df(df_raw, chunk_size=5) - x = pd.Series([i for i in range(10**4)]) - y = pd.Series([i for i in range(10**4)]) + x = pd.Series([i for i in range(10 ** 4)]) + y = pd.Series([i for i in range(10 ** 4)]) def closure(z): return pd.concat([x, y], ignore_index=True) @@ -425,15 +425,15 @@ def closure(z): @pytest.mark.parametrize("multiplier", [1, 3, 4]) def test_data_frame_apply_callable_execute(setup, multiplier): cols = [chr(ord("A") + i) for i in range(10)] - df_raw = pd.DataFrame(dict((c, [i**2 for i in range(20)]) for c in cols)) + df_raw = pd.DataFrame(dict((c, [i ** 2 for i in range(20)]) for c in cols)) df = from_pandas_df(df_raw, chunk_size=5) class callable_df: __slots__ = "x", "__dict__" def __init__(self, multiplier: int = 1): - self.x = pd.Series([i for i in range(10**multiplier)]) - self.y = pd.Series([i for i in range(10**multiplier)]) + self.x = pd.Series([i for i in range(10 ** multiplier)]) + self.y = pd.Series([i for i in range(10 ** multiplier)]) def __call__(self, pdf): return pd.concat([self.x, self.y], ignore_index=True) @@ -447,7 +447,7 @@ def __call__(self, pdf): def test_series_apply_execute(setup): idxes = [chr(ord("A") + i) for i in range(20)] - s_raw = pd.Series([i**2 for i in range(20)], index=idxes) + s_raw = pd.Series([i ** 2 for i in range(20)], index=idxes) series = from_pandas_series(s_raw, chunk_size=5) @@ -488,7 +488,7 @@ def test_series_apply_execute(setup): def test_series_apply_closure_execute(setup): idxes = [chr(ord("A") + i) for i in range(20)] - s_raw = pd.Series([i**2 for i in range(20)], index=idxes) + s_raw = pd.Series([i ** 2 for i in range(20)], index=idxes) series = from_pandas_series(s_raw, chunk_size=5) @@ -542,10 +542,10 @@ def test_apply_with_arrow_dtype_execution(setup): def test_transform_execute(setup): cols = [chr(ord("A") + i) for i in range(10)] - df_raw = pd.DataFrame(dict((c, [i**2 for i in range(20)]) for c in cols)) + df_raw = pd.DataFrame(dict((c, [i ** 2 for i in range(20)]) for c in cols)) idx_vals = [chr(ord("A") + i) for i in range(20)] - s_raw = pd.Series([i**2 for i in range(20)], index=idx_vals) + s_raw = pd.Series([i ** 2 for i in range(20)], index=idx_vals) def rename_fn(f, new_name): f.__name__ = new_name @@ -588,11 +588,7 @@ def f(s): pd.testing.assert_frame_equal(result, expected) fn_dict = OrderedDict( - [ - ("A", "cumsum"), - ("D", ["cumsum", "cummax"]), - ("F", lambda x: x + 1), - ] + [("A", "cumsum"), ("D", ["cumsum", "cummax"]), ("F", lambda x: x + 1),] ) r = df.transform(fn_dict) result = r.execute().fetch() @@ -2056,19 +2052,18 @@ def f2(pdf): assert not ("dtype" in res.data_params) assert res.shape == (4, 2) pd.testing.assert_frame_equal( - res.fetch(), - raw.iloc[[0, 2, 5, 7], :2], + res.fetch(), raw.iloc[[0, 2, 5, 7], :2], ) def test_map_chunk_closure_execute(setup): raw = pd.DataFrame( - np.random.randint(10**3, size=(10, 5)), columns=[f"col{i}" for i in range(5)] + np.random.randint(10 ** 3, size=(10, 5)), columns=[f"col{i}" for i in range(5)] ) df = from_pandas_df(raw, chunk_size=5) num = 1 - dic = {i: -i for i in range(10**3)} + dic = {i: -i for i in range(10 ** 3)} def f1(pdf): return pdf + num @@ -2093,7 +2088,7 @@ def f2(pdf): class callable_df: def __init__(self, multiplier: int = 1): - self.dic = {i: -i for i in range(10**multiplier)} + self.dic = {i: -i for i in range(10 ** multiplier)} def __call__(self, pdf): ret = pd.DataFrame(columns=["col1", "col2"]) diff --git a/mars/dataframe/contrib/raydataset/tests/test_mldataset.py b/mars/dataframe/contrib/raydataset/tests/test_mldataset.py index 2ea907b646..1618083a29 100644 --- a/mars/dataframe/contrib/raydataset/tests/test_mldataset.py +++ b/mars/dataframe/contrib/raydataset/tests/test_mldataset.py @@ -38,10 +38,10 @@ @pytest.fixture async def create_cluster(request): client = await new_cluster( - supervisor_mem=256 * 1024**2, + supervisor_mem=256 * 1024 ** 2, worker_num=2, worker_cpu=1, - worker_mem=256 * 1024**2, + worker_mem=256 * 1024 ** 2, backend=MARS_CI_BACKEND, ) async with client: @@ -126,10 +126,7 @@ async def test_mars_with_xgboost(ray_start_regular_shared, create_cluster): train_set = RayDMatrix(ds, "target") evals_result = {} bst = train( - { - "objective": "binary:logistic", - "eval_metric": ["logloss", "error"], - }, + {"objective": "binary:logistic", "eval_metric": ["logloss", "error"],}, train_set, evals_result=evals_result, evals=[(train_set, "train")], diff --git a/mars/dataframe/contrib/raydataset/tests/test_raydataset.py b/mars/dataframe/contrib/raydataset/tests/test_raydataset.py index adffdfa1e6..67d81bb102 100644 --- a/mars/dataframe/contrib/raydataset/tests/test_raydataset.py +++ b/mars/dataframe/contrib/raydataset/tests/test_raydataset.py @@ -40,10 +40,10 @@ @pytest.fixture async def create_cluster(request): client = await new_cluster( - supervisor_mem=256 * 1024**2, + supervisor_mem=256 * 1024 ** 2, worker_num=2, worker_cpu=1, - worker_mem=256 * 1024**2, + worker_mem=256 * 1024 ** 2, backend=MARS_CI_BACKEND, ) async with client: @@ -96,10 +96,7 @@ async def test_mars_with_xgboost(ray_start_regular_shared, create_cluster): train_set = RayDMatrix(ds, "target") evals_result = {} bst = train( - { - "objective": "binary:logistic", - "eval_metric": ["logloss", "error"], - }, + {"objective": "binary:logistic", "eval_metric": ["logloss", "error"],}, train_set, evals_result=evals_result, evals=[(train_set, "train")], diff --git a/mars/dataframe/core.py b/mars/dataframe/core.py index 3fe442c364..c88f5e6c5e 100644 --- a/mars/dataframe/core.py +++ b/mars/dataframe/core.py @@ -783,7 +783,7 @@ def _iter(self, batch_size=None, session=None, **kw): else: # if batch_size is not specified, use first batch to estimate # batch_size. - default_batch_bytes = 50 * 1024**2 + default_batch_bytes = 50 * 1024 ** 2 first_batch = 1000 size = self.shape[0] @@ -3116,19 +3116,12 @@ class DataFrameOrSeriesData(HasShapeTileableData, _ToPandasMixin): _data_params = DictField("data_params") def __init__( - self, - op=None, - chunks=None, - data_type=None, - data_params=None, - **kw, + self, op=None, chunks=None, data_type=None, data_params=None, **kw, ): self._data_type = data_type self._data_params = data_params or dict() super().__init__( - _op=op, - _chunks=chunks, - **kw, + _op=op, _chunks=chunks, **kw, ) def __getattr__(self, item): diff --git a/mars/dataframe/datasource/date_range.py b/mars/dataframe/datasource/date_range.py index fca3b45ca6..37a4b6fa4b 100644 --- a/mars/dataframe/datasource/date_range.py +++ b/mars/dataframe/datasource/date_range.py @@ -131,9 +131,7 @@ class DataFrameDateRange(DataFrameOperand, DataFrameOperandMixin): inclusive = StringField("inclusive") def __init__( - self, - output_types=None, - **kw, + self, output_types=None, **kw, ): super().__init__(_output_types=output_types, **kw) if self.output_types is None: diff --git a/mars/dataframe/datasource/from_tensor.py b/mars/dataframe/datasource/from_tensor.py index c8a7b8807b..1f0fa744d1 100644 --- a/mars/dataframe/datasource/from_tensor.py +++ b/mars/dataframe/datasource/from_tensor.py @@ -310,7 +310,9 @@ def _tile_input_1d_tileables(cls, op: "DataFrameFromTensor"): index_value = parse_index(pd_index, store_data=True) else: assert op.index is not None - index_chunk = in_tensors[-1].cix[i,] + index_chunk = in_tensors[-1].cix[ + i, + ] index_value = parse_index( pd.Index([], dtype=index_chunk.dtype), index_chunk, @@ -469,9 +471,7 @@ def execute(cls, ctx: Union[dict, Context], op: "DataFrameFromTensor"): if isinstance(index_data, pd.RangeIndex) and len(index_data) == 0: index_data = None ctx[chunk.key] = pd.DataFrame( - tensor_data, - index=index_data, - columns=op.columns, + tensor_data, index=index_data, columns=op.columns, ) else: index_data = ctx[op.index.key] diff --git a/mars/dataframe/datasource/read_csv.py b/mars/dataframe/datasource/read_csv.py index 1f39a5efce..ab433acbcf 100644 --- a/mars/dataframe/datasource/read_csv.py +++ b/mars/dataframe/datasource/read_csv.py @@ -52,7 +52,7 @@ cudf = lazy_import("cudf") -def _find_delimiter(f, block_size=2**16): +def _find_delimiter(f, block_size=2 ** 16): delimiter = b"\n" if f.tell() == 0: return 0 diff --git a/mars/dataframe/datasource/read_raydataset.py b/mars/dataframe/datasource/read_raydataset.py index 090abbaa03..da37aa37bb 100644 --- a/mars/dataframe/datasource/read_raydataset.py +++ b/mars/dataframe/datasource/read_raydataset.py @@ -155,8 +155,7 @@ def read_ray_dataset(ds, columns=None, incremental_index=False, **kwargs): @functools.wraps(read_ray_dataset) def read_raydataset(*args, **kwargs): warnings.warn( - "read_raydataset has been renamed to read_ray_dataset", - DeprecationWarning, + "read_raydataset has been renamed to read_ray_dataset", DeprecationWarning, ) return read_ray_dataset(*args, **kwargs) diff --git a/mars/dataframe/datasource/tests/test_datasource_execution.py b/mars/dataframe/datasource/tests/test_datasource_execution.py index 2bbb777085..9b5378819a 100644 --- a/mars/dataframe/datasource/tests/test_datasource_execution.py +++ b/mars/dataframe/datasource/tests/test_datasource_execution.py @@ -224,8 +224,7 @@ def test_series_from_tensor(setup): series = md.Series(mt.ones((10,), chunk_size=4)) pd.testing.assert_series_equal( - series.execute().fetch(), - pd.Series(np.ones(10)), + series.execute().fetch(), pd.Series(np.ones(10)), ) index_data = np.random.rand(10) @@ -1284,8 +1283,7 @@ def test_read_raydataset(ray_start_regular, ray_create_mars_cluster): ds3 = ray.data.from_arrow([pa.Table.from_pandas(pdf2) for _ in range(3)]) df3 = md.read_ray_dataset(ds3) pd.testing.assert_frame_equal( - df3.head(5).to_pandas(), - pdf2, + df3.head(5).to_pandas(), pdf2, ) diff --git a/mars/dataframe/datastore/to_csv.py b/mars/dataframe/datastore/to_csv.py index 9e7019e8fd..a76491a497 100644 --- a/mars/dataframe/datastore/to_csv.py +++ b/mars/dataframe/datastore/to_csv.py @@ -458,7 +458,7 @@ def execute(cls, ctx, op): rest = total_bytes while rest > 0: # at most 4M - write_bytes = min(4 * 1024**2, rest) + write_bytes = min(4 * 1024 ** 2, rest) f.write(b"\00" * write_bytes) rest -= write_bytes diff --git a/mars/dataframe/groupby/__init__.py b/mars/dataframe/groupby/__init__.py index 1719fbae80..cef216d782 100644 --- a/mars/dataframe/groupby/__init__.py +++ b/mars/dataframe/groupby/__init__.py @@ -30,9 +30,7 @@ def _install(): # Just for enabling custom agg function registration. # Therefore, del this immediately after import. - from .nunique import ( - DataFrameCustomGroupByNuniqueMixin, - ) + from .nunique import DataFrameCustomGroupByNuniqueMixin del DataFrameCustomGroupByNuniqueMixin diff --git a/mars/dataframe/groupby/aggregation.py b/mars/dataframe/groupby/aggregation.py index 9508aaa9fe..2970ef8b4e 100644 --- a/mars/dataframe/groupby/aggregation.py +++ b/mars/dataframe/groupby/aggregation.py @@ -140,10 +140,7 @@ def _group_kurt(x, *args, **kwargs): def build_mock_agg_result( - groupby: GROUPBY_TYPE, - groupby_params: Dict, - raw_func: Callable, - **raw_func_kw, + groupby: GROUPBY_TYPE, groupby_params: Dict, raw_func: Callable, **raw_func_kw, ): try: agg_result = groupby.op.build_mock_groupby().aggregate(raw_func, **raw_func_kw) @@ -490,7 +487,9 @@ def _gen_map_chunks( by = [] for v in map_op.groupby_params["by"]: if isinstance(v, ENTITY_TYPE): - by_chunk = v.cix[chunk.index[0],] + by_chunk = v.cix[ + chunk.index[0], + ] chunk_inputs.append(by_chunk) by.append(by_chunk) else: @@ -567,10 +566,7 @@ def _gen_pivot_chunk( sample_chunks: List[ChunkType], agg_chunk_len: int, ): - properties = dict( - by=op.groupby_params["by"], - gpu=op.is_gpu(), - ) + properties = dict(by=op.groupby_params["by"], gpu=op.is_gpu(),) # stage 2: gather and merge samples, choose and broadcast p-1 pivots kind = "quicksort" @@ -584,25 +580,18 @@ def _gen_pivot_chunk( ) concat_pivot_chunk = concat_pivot_op.new_chunk( - sample_chunks, - shape=(agg_chunk_len,), - dtype=np.dtype(object), + sample_chunks, shape=(agg_chunk_len,), dtype=np.dtype(object), ) return concat_pivot_chunk @classmethod def _sample_chunks( - cls, - op: "DataFrameGroupByAgg", - agg_chunks: List[ChunkType], + cls, op: "DataFrameGroupByAgg", agg_chunks: List[ChunkType], ): chunk_shape = len(agg_chunks) sampled_chunks = [] - properties = dict( - by=op.groupby_params["by"], - gpu=op.is_gpu(), - ) + properties = dict(by=op.groupby_params["by"], gpu=op.is_gpu(),) for i, chunk in enumerate(agg_chunks): kws = [] @@ -875,13 +864,7 @@ def _build_tree_and_shuffle_chunks( op, len(combined_chunks), ) - return cls._perform_shuffle( - op, - combined_chunks, - in_df, - out_df, - func_infos, - ) + return cls._perform_shuffle(op, combined_chunks, in_df, out_df, func_infos,) @classmethod def _tile_auto( @@ -1144,15 +1127,18 @@ def _execute_combine(cls, ctx, op: "DataFrameGroupByAgg"): in_data_dict = cls._pack_inputs(op.agg_funcs, in_data_tuple) combines = [] - for raw_input, ( - _input_key, - raw_func_name, - _map_func_name, - agg_func_name, - custom_reduction, - output_key, - _output_limit, - kwds, + for ( + raw_input, + ( + _input_key, + raw_func_name, + _map_func_name, + agg_func_name, + custom_reduction, + output_key, + _output_limit, + kwds, + ), ) in zip(ctx[op.inputs[0].key], op.agg_funcs): input_obj = in_data_dict[output_key] if agg_func_name == "custom_reduction": diff --git a/mars/dataframe/groupby/core.py b/mars/dataframe/groupby/core.py index 7cf4e43e0a..d1a3bcf788 100644 --- a/mars/dataframe/groupby/core.py +++ b/mars/dataframe/groupby/core.py @@ -277,7 +277,9 @@ def tile(cls, op): chunk_by = [] for k in by: if isinstance(k, SERIES_TYPE): - by_chunk = k.cix[chunk.index[0],] + by_chunk = k.cix[ + chunk.index[0], + ] chunk_by.append(by_chunk) chunk_inputs.append(by_chunk) else: @@ -285,9 +287,7 @@ def tile(cls, op): map_op._by = chunk_by map_chunks.append( map_op.new_chunk( - chunk_inputs, - shape=(np.nan, np.nan), - index=chunk.index, + chunk_inputs, shape=(np.nan, np.nan), index=chunk.index, ) ) @@ -417,10 +417,9 @@ def _take_index(src, f): filtered_by.append(v) filtered.append(_take_index(d, index_filter)) if deliver_by: - ctx[chunk.key, reducer_index] = ctx.get_current_chunk().index, ( - *filtered, - filtered_by, - deliver_by, + ctx[chunk.key, reducer_index] = ( + ctx.get_current_chunk().index, + (*filtered, filtered_by, deliver_by,), ) else: if isinstance(df, tuple): diff --git a/mars/dataframe/groupby/tests/test_groupby.py b/mars/dataframe/groupby/tests/test_groupby.py index 852949862d..b4fa750356 100644 --- a/mars/dataframe/groupby/tests/test_groupby.py +++ b/mars/dataframe/groupby/tests/test_groupby.py @@ -141,9 +141,7 @@ def test_groupby_agg(): agg_chunk = chunk.inputs[0].inputs[0].inputs[0].inputs[0] assert agg_chunk.op.stage == OperandStage.map - r = mdf.groupby( - "c2", - ).sum(method="shuffle") + r = mdf.groupby("c2",).sum(method="shuffle") assert isinstance(r.op, DataFrameGroupByAgg) assert isinstance(r, DataFrame) diff --git a/mars/dataframe/groupby/tests/test_groupby_execution.py b/mars/dataframe/groupby/tests/test_groupby_execution.py index b2b1fb8c28..c50423df37 100644 --- a/mars/dataframe/groupby/tests/test_groupby_execution.py +++ b/mars/dataframe/groupby/tests/test_groupby_execution.py @@ -513,14 +513,12 @@ def test_dataframe_groupby_agg_sort(setup): continue r = mdf.groupby("c2").agg(agg_fun, method=method) pd.testing.assert_frame_equal( - r.execute().fetch(), - raw.groupby("c2").agg(agg_fun), + r.execute().fetch(), raw.groupby("c2").agg(agg_fun), ) r = mdf.groupby("c2").agg(agg_funs, method=method) pd.testing.assert_frame_equal( - r.execute().fetch(), - raw.groupby("c2").agg(agg_funs), + r.execute().fetch(), raw.groupby("c2").agg(agg_funs), ) agg = OrderedDict([("c1", ["min", "mean"]), ("c3", "std")]) @@ -533,14 +531,12 @@ def test_dataframe_groupby_agg_sort(setup): r = mdf.groupby("c2").agg({"c1": "min", "c3": "min"}, method=method) pd.testing.assert_frame_equal( - r.execute().fetch(), - raw.groupby("c2").agg({"c1": "min", "c3": "min"}), + r.execute().fetch(), raw.groupby("c2").agg({"c1": "min", "c3": "min"}), ) r = mdf.groupby("c2").agg({"c1": "min"}, method=method) pd.testing.assert_frame_equal( - r.execute().fetch(), - raw.groupby("c2").agg({"c1": "min"}), + r.execute().fetch(), raw.groupby("c2").agg({"c1": "min"}), ) # test groupby series @@ -675,11 +671,7 @@ def _disallow_combine_and_agg(ctx, op): with option_context({"chunk_store_limit": 1}): raw2 = pd.DataFrame( - { - "c1": rs.randint(20, size=100), - "c2": rs.rand(100), - "c3": rs.rand(100), - } + {"c1": rs.randint(20, size=100), "c2": rs.rand(100), "c3": rs.rand(100),} ) mdf = md.DataFrame(raw2, chunk_size=20) r = mdf.groupby("c3").agg("min") @@ -714,7 +706,7 @@ def test_distributed_groupby_agg(setup_cluster): rs = np.random.RandomState(0) raw = pd.DataFrame(rs.rand(50000, 10)) df = md.DataFrame(raw, chunk_size=raw.shape[0] // 2) - with option_context({"chunk_store_limit": 1024**2}): + with option_context({"chunk_store_limit": 1024 ** 2}): r = df.groupby(0).sum(combine_size=1) result = r.execute().fetch() pd.testing.assert_frame_equal(result, raw.groupby(0).sum()) @@ -1062,8 +1054,7 @@ def f(df): r = mdf2.groupby("c").transform(f, skip_infer=True) pd.testing.assert_frame_equal( - r.execute().fetch().sort_index(), - df2.groupby("c").transform(f).sort_index(), + r.execute().fetch().sort_index(), df2.groupby("c").transform(f).sort_index(), ) if pd.__version__ != "1.1.0": diff --git a/mars/dataframe/indexing/setitem.py b/mars/dataframe/indexing/setitem.py index 234d7a005d..826c03d180 100644 --- a/mars/dataframe/indexing/setitem.py +++ b/mars/dataframe/indexing/setitem.py @@ -268,7 +268,9 @@ def tile(cls, op: "DataFrameSetitem"): value_chunks, shape=shape, dtypes=dtypes ) else: - value_chunk = value.cix[c.index[0],] + value_chunk = value.cix[ + c.index[0], + ] chunk_inputs = [c, value_chunk] @@ -278,9 +280,7 @@ def tile(cls, op: "DataFrameSetitem"): shape = (shape[0], shape[1] + len(append_cols)) result_chunk = chunk_op.new_chunk( - chunk_inputs, - shape=shape, - index=c.index, + chunk_inputs, shape=shape, index=c.index, ) result_chunk._set_tileable_meta( tileable_key=out.key, diff --git a/mars/dataframe/indexing/tests/test_indexing.py b/mars/dataframe/indexing/tests/test_indexing.py index d1e037b999..2165c3a584 100644 --- a/mars/dataframe/indexing/tests/test_indexing.py +++ b/mars/dataframe/indexing/tests/test_indexing.py @@ -432,9 +432,7 @@ def test_iloc_setitem(): assert series.chunks[1].op.value == 2 raw = pd.DataFrame( - np.random.rand(9, 2), - index=["a1", "a2", "a3"] * 3, - columns=["x", "y"], + np.random.rand(9, 2), index=["a1", "a2", "a3"] * 3, columns=["x", "y"], ) df = md.DataFrame(raw, chunk_size=4) iloc_df = df.iloc[:, 1:] @@ -635,9 +633,7 @@ def test_dataframe_loc(): # test loc chunk's index_value raw = pd.DataFrame( - np.random.rand(9, 2), - index=["a1", "a2", "a3"] * 3, - columns=["x", "y"], + np.random.rand(9, 2), index=["a1", "a2", "a3"] * 3, columns=["x", "y"], ) df = md.DataFrame(raw, chunk_size=4) loc_df = df.loc[:, ["x"]] diff --git a/mars/dataframe/indexing/tests/test_indexing_execution.py b/mars/dataframe/indexing/tests/test_indexing_execution.py index 74a5693c0c..b170de62ff 100644 --- a/mars/dataframe/indexing/tests/test_indexing_execution.py +++ b/mars/dataframe/indexing/tests/test_indexing_execution.py @@ -695,8 +695,7 @@ def test_setitem(setup): result = df.execute().fetch() if not _allow_set_missing_list: expected = data.copy().reindex( - ["c" + str(i) for i in range(5)] + ["c10", "c11", "c12"], - axis=1, + ["c" + str(i) for i in range(5)] + ["c10", "c11", "c12"], axis=1, ) else: expected = data.copy() @@ -845,15 +844,9 @@ def test_reset_index_execution(setup): expected = (data1 + data2).reset_index() np.testing.assert_array_equal(result.to_numpy(), expected.to_numpy()) - data1 = pd.Series( - np.random.rand(10), - index=[0, 10, 2, 3, 4, 5, 6, 7, 8, 9], - ) + data1 = pd.Series(np.random.rand(10), index=[0, 10, 2, 3, 4, 5, 6, 7, 8, 9],) series1 = md.Series(data1, chunk_size=3) - data2 = pd.Series( - np.random.rand(10), - index=[11, 1, 2, 5, 7, 6, 8, 9, 10, 3], - ) + data2 = pd.Series(np.random.rand(10), index=[11, 1, 2, 5, 7, 6, 8, 9, 10, 3],) series2 = md.Series(data2, chunk_size=3) df = (series1 + series2).reset_index(incremental_index=True) result = df.execute().fetch() @@ -921,8 +914,8 @@ def test_rename(setup): r = series.rename("new_series") pd.testing.assert_series_equal(r.execute().fetch(), raw.rename("new_series")) - r = series.rename(lambda x: 2**x) - pd.testing.assert_series_equal(r.execute().fetch(), raw.rename(lambda x: 2**x)) + r = series.rename(lambda x: 2 ** x) + pd.testing.assert_series_equal(r.execute().fetch(), raw.rename(lambda x: 2 ** x)) with pytest.raises(TypeError): series.name = {1: 10, 2: 20} diff --git a/mars/dataframe/indexing/where.py b/mars/dataframe/indexing/where.py index 94931947df..0cde782a97 100644 --- a/mars/dataframe/indexing/where.py +++ b/mars/dataframe/indexing/where.py @@ -174,7 +174,9 @@ def get_tiled_chunk(obj, index, axis=None): return obj.cix[index[0], index[1]] elif isinstance(obj, SERIES_TYPE): axis = axis if axis is not None else op.axis - return obj.cix[index[axis],] + return obj.cix[ + index[axis], + ] else: return obj diff --git a/mars/dataframe/merge/concat.py b/mars/dataframe/merge/concat.py index b3ccc5c921..9867073e59 100644 --- a/mars/dataframe/merge/concat.py +++ b/mars/dataframe/merge/concat.py @@ -170,9 +170,7 @@ def _tile_series(cls, op: "DataFrameConcat"): index = (c.index[0], cum_index) shape = (c.shape[0], 1) to_frame_op = DataFrameFromTensor( - input=c, - index=None, - columns=None, + input=c, index=None, columns=None, ) if c.name: dtypes = pd.Series([c.dtype], index=[c.name]) diff --git a/mars/dataframe/merge/tests/test_merge_execution.py b/mars/dataframe/merge/tests/test_merge_execution.py index 62281b7199..e0a34c578c 100644 --- a/mars/dataframe/merge/tests/test_merge_execution.py +++ b/mars/dataframe/merge/tests/test_merge_execution.py @@ -390,18 +390,12 @@ def test_broadcast_merge(setup): ns = np.random.RandomState(0) # small dataframe raw1 = pd.DataFrame( - { - "key": ns.randint(0, 10, size=10), - "value": np.arange(10), - }, + {"key": ns.randint(0, 10, size=10), "value": np.arange(10),}, index=[f"a{i}" for i in range(10)], ) # big dataframe raw2 = pd.DataFrame( - { - "key": ns.randint(0, 100, size=100), - "value": np.arange(100, 200), - }, + {"key": ns.randint(0, 100, size=100), "value": np.arange(100, 200),}, index=[f"a{i}" for i in range(100)], ) diff --git a/mars/dataframe/reduction/all.py b/mars/dataframe/reduction/all.py index 6b575187be..c09ab858a7 100644 --- a/mars/dataframe/reduction/all.py +++ b/mars/dataframe/reduction/all.py @@ -100,13 +100,7 @@ def all_series( def all_dataframe( - df, - axis=0, - bool_only=None, - skipna=True, - level=None, - combine_size=None, - method=None, + df, axis=0, bool_only=None, skipna=True, level=None, combine_size=None, method=None, ): use_inf_as_na = options.dataframe.mode.use_inf_as_na output_types = [OutputType.series] if axis is not None else [OutputType.scalar] diff --git a/mars/dataframe/reduction/any.py b/mars/dataframe/reduction/any.py index da7a670a67..3bb0516a95 100644 --- a/mars/dataframe/reduction/any.py +++ b/mars/dataframe/reduction/any.py @@ -100,13 +100,7 @@ def any_series( def any_dataframe( - df, - axis=0, - bool_only=None, - skipna=True, - level=None, - combine_size=None, - method=None, + df, axis=0, bool_only=None, skipna=True, level=None, combine_size=None, method=None, ): use_inf_as_na = options.dataframe.mode.use_inf_as_na output_types = [OutputType.series] if axis is not None else [OutputType.scalar] diff --git a/mars/dataframe/reduction/core.py b/mars/dataframe/reduction/core.py index 69f01a7a2d..050cea42f6 100644 --- a/mars/dataframe/reduction/core.py +++ b/mars/dataframe/reduction/core.py @@ -215,12 +215,7 @@ def _default_agg_fun(value, func_name=None, **kw): @functools.lru_cache(100) def _get_series_reduction_dtype( - dtype, - func_name, - axis=None, - bool_only=False, - skipna=True, - numeric_only=False, + dtype, func_name, axis=None, bool_only=False, skipna=True, numeric_only=False, ): test_series = build_series(dtype=dtype, ensure_string=True) if func_name == "count": @@ -1239,11 +1234,7 @@ def compile(self) -> ReductionSteps: post_funcs.append( ReductionPostStep( - step.input_keys, - step.output_key, - func_name, - post_cols, - step.func, + step.input_keys, step.output_key, func_name, post_cols, step.func, ) ) diff --git a/mars/dataframe/reduction/kurtosis.py b/mars/dataframe/reduction/kurtosis.py index df3678aac9..2aa9685f51 100644 --- a/mars/dataframe/reduction/kurtosis.py +++ b/mars/dataframe/reduction/kurtosis.py @@ -49,20 +49,20 @@ def kurt(x): cnt = x.count() mean = x.mean(skipna=skipna) divided = ( - (x**4).mean(skipna=skipna) - - 4 * (x**3).mean(skipna=skipna) * mean - + 6 * (x**2).mean(skipna=skipna) * mean**2 - - 3 * mean**4 + (x ** 4).mean(skipna=skipna) + - 4 * (x ** 3).mean(skipna=skipna) * mean + + 6 * (x ** 2).mean(skipna=skipna) * mean ** 2 + - 3 * mean ** 4 ) var = x.var(skipna=skipna, ddof=0) if isinstance(var, ENTITY_TYPE) or var > 0: - val = where_function(var > 0, divided / var**2, np.nan) + val = where_function(var > 0, divided / var ** 2, np.nan) else: val = np.nan if not bias: val = where_function( (var > 0) & (cnt > 3), - (val * (cnt**2 - 1) - 3 * (cnt - 1) ** 2) / (cnt - 2) / (cnt - 3), + (val * (cnt ** 2 - 1) - 3 * (cnt - 1) ** 2) / (cnt - 2) / (cnt - 3), np.nan, ) if not fisher: diff --git a/mars/dataframe/reduction/skew.py b/mars/dataframe/reduction/skew.py index 2673eb3ed9..07951702dd 100644 --- a/mars/dataframe/reduction/skew.py +++ b/mars/dataframe/reduction/skew.py @@ -44,13 +44,13 @@ def skew(x): cnt = x.count() mean = x.mean(skipna=skipna) divided = ( - (x**3).mean(skipna=skipna) - - 3 * (x**2).mean(skipna=skipna) * mean - + 2 * mean**3 + (x ** 3).mean(skipna=skipna) + - 3 * (x ** 2).mean(skipna=skipna) * mean + + 2 * mean ** 3 ) var = x.var(skipna=skipna, ddof=0) if isinstance(var, ENTITY_TYPE) or var > 0: - val = where_function(var > 0, divided / var**1.5, np.nan) + val = where_function(var > 0, divided / var ** 1.5, np.nan) else: val = np.nan if not bias: diff --git a/mars/dataframe/reduction/tests/test_reduction.py b/mars/dataframe/reduction/tests/test_reduction.py index 53fbdb9458..19543f146b 100644 --- a/mars/dataframe/reduction/tests/test_reduction.py +++ b/mars/dataframe/reduction/tests/test_reduction.py @@ -469,7 +469,7 @@ def test_compile_function(): # test agg for all data for ndim in [1, 2]: compiler = ReductionCompiler(store_source=True) - compiler.add_function(lambda x: (x**2).count() + 1, ndim=ndim) + compiler.add_function(lambda x: (x ** 2).count() + 1, ndim=ndim) result = compiler.compile() # check pre_funcs assert len(result.pre_funcs) == 1 @@ -484,7 +484,7 @@ def test_compile_function(): assert "add" in result.post_funcs[0].func.__source__ compiler.add_function( - lambda x: -x.prod() ** 2 + (1 + (x**2).count()), ndim=ndim + lambda x: -x.prod() ** 2 + (1 + (x ** 2).count()), ndim=ndim ) result = compiler.compile() # check pre_funcs @@ -595,7 +595,7 @@ def agg(self, v1): class MockReduction2(CustomReduction): def pre(self, value): - return value + 1, value**2 + return value + 1, value ** 2 def agg(self, v1, v2): return v1.sum(), v2.prod() diff --git a/mars/dataframe/reduction/tests/test_reduction_execution.py b/mars/dataframe/reduction/tests/test_reduction_execution.py index 4ba305c86c..ed92e1c7b7 100644 --- a/mars/dataframe/reduction/tests/test_reduction_execution.py +++ b/mars/dataframe/reduction/tests/test_reduction_execution.py @@ -919,8 +919,7 @@ def test_dataframe_aggregate(setup, check_ref_counts): result = df.agg({0: [sum, min, max]}) pd.testing.assert_frame_equal( - result.execute().fetch(), - data.agg({0: [sum, min, max]}), + result.execute().fetch(), data.agg({0: [sum, min, max]}), ) if _support_kw_agg: @@ -1015,7 +1014,7 @@ def agg(self, v1): class MockReduction2(CustomReduction): def pre(self, value): - return value + 1, value**2 + return value + 1, value ** 2 def agg(self, v1, v2): return v1.sum(), v2.prod() diff --git a/mars/dataframe/reduction/var.py b/mars/dataframe/reduction/var.py index 319d7dd999..4001278447 100644 --- a/mars/dataframe/reduction/var.py +++ b/mars/dataframe/reduction/var.py @@ -39,8 +39,8 @@ def get_reduction_callable(cls, op): def var(x): cnt = x.count() if ddof == 0: - return (x**2).mean(skipna=skipna) - (x.mean(skipna=skipna)) ** 2 - return ((x**2).sum(skipna=skipna) - x.sum(skipna=skipna) ** 2 / cnt) / ( + return (x ** 2).mean(skipna=skipna) - (x.mean(skipna=skipna)) ** 2 + return ((x ** 2).sum(skipna=skipna) - x.sum(skipna=skipna) ** 2 / cnt) / ( cnt - ddof ) diff --git a/mars/dataframe/sort/psrs.py b/mars/dataframe/sort/psrs.py index 04ccc26cf4..eaa7b7618a 100644 --- a/mars/dataframe/sort/psrs.py +++ b/mars/dataframe/sort/psrs.py @@ -183,9 +183,7 @@ def concat_and_pivot( ) concat_pivot_index = out_idx[: op.axis] + (0,) + out_idx[op.axis :] concat_pivot_chunk = concat_pivot_op.new_chunk( - sampled_chunks, - shape=concat_pivot_shape, - index=concat_pivot_index, + sampled_chunks, shape=concat_pivot_shape, index=concat_pivot_index, ) return concat_pivot_chunk diff --git a/mars/dataframe/sort/tests/test_sort_execution.py b/mars/dataframe/sort/tests/test_sort_execution.py index d7b6fbe9d5..34c0430e0a 100644 --- a/mars/dataframe/sort/tests/test_sort_execution.py +++ b/mars/dataframe/sort/tests/test_sort_execution.py @@ -412,10 +412,7 @@ def test_gpu_execution(setup_gpu): pd.testing.assert_frame_equal(result.to_pandas(), expected) # test Series.sort_index - raw = pd.Series( - np.random.rand(10), - index=np.random.rand(10), - ) + raw = pd.Series(np.random.rand(10), index=np.random.rand(10),) series = Series(raw).to_gpu() result = series.sort_index().execute().fetch() diff --git a/mars/dataframe/statistics/corr.py b/mars/dataframe/statistics/corr.py index 2de1f5b68f..5394027545 100644 --- a/mars/dataframe/statistics/corr.py +++ b/mars/dataframe/statistics/corr.py @@ -118,8 +118,8 @@ def _tile_pearson_cross(left, right, min_periods): sum_left = left_tensor.T.dot(nna_right) sum_right = right_tensor.T.dot(nna_left) - sum_left2 = (left_tensor.T**2).dot(nna_right) - sum_right2 = (right_tensor.T**2).dot(nna_left) + sum_left2 = (left_tensor.T ** 2).dot(nna_right) + sum_right2 = (right_tensor.T ** 2).dot(nna_left) sum_mul = left_tensor.T.dot(right_tensor) data_count = nna_left.T.dot(nna_right) @@ -148,8 +148,8 @@ def _tile_pearson_align(cls, left, right, axis): sum_left = left.mul(nna_right, axis=axis).sum(axis=axis) sum_right = nna_left.mul(right, axis=axis).sum(axis=axis) - sum_left2 = (left**2).mul(nna_right, axis=axis).sum(axis=axis) - sum_right2 = nna_left.mul(right**2, axis=axis).sum(axis=axis) + sum_left2 = (left ** 2).mul(nna_right, axis=axis).sum(axis=axis) + sum_right2 = nna_left.mul(right ** 2, axis=axis).sum(axis=axis) sum_mul = left.mul(right, axis=axis).sum(axis=axis) data_count = nna_left.mul(nna_right, axis=axis).sum(axis=axis) diff --git a/mars/dataframe/tests/test_utils.py b/mars/dataframe/tests/test_utils.py index ec003e4269..5f08810a9d 100644 --- a/mars/dataframe/tests/test_utils.py +++ b/mars/dataframe/tests/test_utils.py @@ -562,10 +562,7 @@ def test_make_dtypes(): @pytest.mark.parametrize( "columns", - [ - pd.RangeIndex(8), - pd.MultiIndex.from_product([list("AB"), list("CDEF")]), - ], + [pd.RangeIndex(8), pd.MultiIndex.from_product([list("AB"), list("CDEF")]),], ) def test_build_concatenated_rows_frame(setup, columns): df = pd.DataFrame(np.random.rand(16, 8), columns=columns) @@ -649,7 +646,7 @@ def get_chunks_meta(self, data_keys: List[str], **_) -> List[Dict]: @pytest.mark.parametrize("multiplier_and_expected", [(1, False), (3, True), (4, True)]) def test_whether_to_clean_up(multiplier_and_expected): - threshold = 10**4 + threshold = 10 ** 4 multiplier, expected = multiplier_and_expected class FakeOperandwithClosure: @@ -676,9 +673,9 @@ def __init__(self, multiplier): bytearray("This is a byte array.", "utf-8"), ], ] - self.dic = {"one": pd.Series([i for i in range(10**multiplier)])} + self.dic = {"one": pd.Series([i for i in range(10 ** multiplier)])} self.df = pd.DataFrame(self.dic) - self.ds = pd.Series([i for i in range(10**multiplier)]) + self.ds = pd.Series([i for i in range(10 ** multiplier)]) def __call__(self, z): pass diff --git a/mars/dataframe/utils.py b/mars/dataframe/utils.py index 9df187bb6a..649f9c443a 100644 --- a/mars/dataframe/utils.py +++ b/mars/dataframe/utils.py @@ -1450,7 +1450,7 @@ def _concat_chunks(merge_chunks: List[ChunkType], output_index: int): # removed or refactored in the future to calculate func size # with more accuracy as well as address some serialization issues. def clean_up_func(op): - threshold = int(os.getenv("MARS_CLOSURE_CLEAN_UP_BYTES_THRESHOLD", 10**4)) + threshold = int(os.getenv("MARS_CLOSURE_CLEAN_UP_BYTES_THRESHOLD", 10 ** 4)) if threshold == -1: # pragma: no cover return ctx = get_context() diff --git a/mars/dataframe/window/ewm/aggregation.py b/mars/dataframe/window/ewm/aggregation.py index d46ce10d60..90f41a1e0f 100644 --- a/mars/dataframe/window/ewm/aggregation.py +++ b/mars/dataframe/window/ewm/aggregation.py @@ -58,7 +58,7 @@ def _add_pred_results( new_locals = [] combine_axis = pred_results[0].ndim - axis - 1 weight = (1 - alpha) ** order - pred_coeff = weight**pred_exponent + pred_coeff = weight ** pred_exponent for idx, (pred_result, local_result) in enumerate(zip(pred_results, local_results)): local_result.fillna(df_filler, inplace=True) pred_result = pred_result.mul(pred_coeff).sum(axis=axis) @@ -83,7 +83,7 @@ def _add_pred_results( weights_df.ffill(inplace=True) weights_df.fillna(0, inplace=True) - weights_df = weight**weights_df + weights_df = weight ** weights_df pred_df = weights_df.mul(pred_result, axis=combine_axis) new_locals.append(local_result.add(pred_df, axis=combine_axis)) @@ -143,7 +143,7 @@ def _combine_var( local_count2_data, ) = local_results if pred_results is None: - return (local_sum_square - local_sum_data**2 / local_count_data) / ( + return (local_sum_square - local_sum_data ** 2 / local_count_data) / ( local_count_data - local_count2_data / local_count_data ) @@ -170,7 +170,7 @@ def _combine_var( alpha_data=alpha_data, ) - return (local_sum_square - local_sum_data**2 / local_count_data) / ( + return (local_sum_square - local_sum_data ** 2 / local_count_data) / ( local_count_data - local_count2_data / local_count_data ) @@ -357,7 +357,7 @@ def _execute_cumsum2(cls, op: "DataFrameEwmAgg", in_data): alpha_sum, _ = op._execute_cum_alpha_coeff(op, in_data, 1) cumsum, _ = op._execute_cumsum(op, in_data) - result = alpha_sum * data + cumsum**2 / alpha_sum + result = alpha_sum * data + cumsum ** 2 / alpha_sum if op.output_agg: summary = result.ffill()[-1:] diff --git a/mars/dataframe/window/expanding/aggregation.py b/mars/dataframe/window/expanding/aggregation.py index 53fdc13379..56233c709b 100644 --- a/mars/dataframe/window/expanding/aggregation.py +++ b/mars/dataframe/window/expanding/aggregation.py @@ -94,10 +94,10 @@ def _combine_var(pred_results, local_results, axis=0): pred_sum_data, pred_count_data, pred_var_data = pred_results local_sum_square = ( - local_count_data * local_var_data + local_sum_data**2 / local_count_data + local_count_data * local_var_data + local_sum_data ** 2 / local_count_data ) pred_sum_square = ( - pred_count_data * pred_var_data + pred_sum_data**2 / pred_count_data + pred_count_data * pred_var_data + pred_sum_data ** 2 / pred_count_data ) local_sum_square, local_sum_data, local_count_data = _add_pred_results( @@ -106,7 +106,7 @@ def _combine_var(pred_results, local_results, axis=0): axis=axis, ) - return (local_sum_square - local_sum_data**2 / local_count_data) / ( + return (local_sum_square - local_sum_data ** 2 / local_count_data) / ( local_count_data - 1 ) diff --git a/mars/deploy/kubedl/client.py b/mars/deploy/kubedl/client.py index 12b2ab611e..414a2821b5 100644 --- a/mars/deploy/kubedl/client.py +++ b/mars/deploy/kubedl/client.py @@ -329,16 +329,16 @@ def new_cluster( image=None, scheduler_num=1, scheduler_cpu=2, - scheduler_mem=4 * 1024**3, + scheduler_mem=4 * 1024 ** 3, worker_num=1, worker_cpu=8, - worker_mem=32 * 1024**3, + worker_mem=32 * 1024 ** 3, worker_spill_paths=None, worker_cache_mem="45%", min_worker_num=None, web_num=1, web_cpu=1, - web_mem=4 * 1024**3, + web_mem=4 * 1024 ** 3, slb_endpoint=None, verify_ssl=True, job_name=None, diff --git a/mars/deploy/kubedl/config.py b/mars/deploy/kubedl/config.py index 71ef6f9d19..8edaa972c6 100644 --- a/mars/deploy/kubedl/config.py +++ b/mars/deploy/kubedl/config.py @@ -116,9 +116,7 @@ def build(self): "replicas": int(self._replicas), "restartPolicy": "Never", "template": { - "metadata": { - "labels": {"mars/service-type": self._name}, - }, + "metadata": {"labels": {"mars/service-type": self._name},}, "spec": self.build_template_spec(), }, } diff --git a/mars/deploy/kubernetes/config.py b/mars/deploy/kubernetes/config.py index 665c3e4cbc..5304cead1f 100644 --- a/mars/deploy/kubernetes/config.py +++ b/mars/deploy/kubernetes/config.py @@ -134,12 +134,7 @@ def __init__(self, name): def build(self): return { "kind": "Namespace", - "metadata": { - "name": self._name, - "labels": { - "name": self._name, - }, - }, + "metadata": {"name": self._name, "labels": {"name": self._name,},}, } @@ -163,9 +158,7 @@ def build(self): "kind": "Service", "metadata": { "name": self._name, - "labels": { - "mars/service-name": self._name, - }, + "labels": {"mars/service-name": self._name,}, }, "spec": _remove_nones( { @@ -426,9 +419,7 @@ def build_container(self): } lifecycle_dict = _remove_nones( { - "preStop": { - "exec": {"command": self._pre_stop_command}, - } + "preStop": {"exec": {"command": self._pre_stop_command},} if self._pre_stop_command else None, } @@ -463,15 +454,11 @@ def build_template_spec(self): def build(self): return { "kind": self._kind, - "metadata": { - "name": self._name, - }, + "metadata": {"name": self._name,}, "spec": { "replicas": int(self._replicas), "template": { - "metadata": { - "labels": _remove_nones(self._labels) or None, - }, + "metadata": {"labels": _remove_nones(self._labels) or None,}, "spec": self.build_template_spec(), }, }, diff --git a/mars/deploy/kubernetes/tests/test_config.py b/mars/deploy/kubernetes/tests/test_config.py index 09dce08d37..af1ab982b7 100644 --- a/mars/deploy/kubernetes/tests/test_config.py +++ b/mars/deploy/kubernetes/tests/test_config.py @@ -56,13 +56,13 @@ def test_supervisor_object(): assert supervisor_config_dict["spec"]["replicas"] == 1 container_dict = supervisor_config_dict["spec"]["template"]["spec"]["containers"][0] - assert int(container_dict["resources"]["requests"]["memory"]) == 10 * 1024**3 + assert int(container_dict["resources"]["requests"]["memory"]) == 10 * 1024 ** 3 container_envs = dict((p["name"], p) for p in container_dict["env"]) assert container_envs["TEST_ENV"]["value"] == "test_val" assert container_envs["MKL_NUM_THREADS"]["value"] == "2" assert container_envs["MARS_CPU_TOTAL"]["value"] == "2" - assert int(container_envs["MARS_MEMORY_TOTAL"]["value"]) == 10 * 1024**3 + assert int(container_envs["MARS_MEMORY_TOTAL"]["value"]) == 10 * 1024 ** 3 assert container_envs["MARS_LOAD_MODULES"]["value"] == "mars.test_mod" @@ -70,7 +70,7 @@ def test_worker_object(): worker_config_dict = MarsWorkersConfig( 4, cpu=2, - memory=10 * 1024**3, + memory=10 * 1024 ** 3, limit_resources=True, memory_limit_ratio=2, spill_volumes=[ @@ -86,13 +86,13 @@ def test_worker_object(): assert worker_config_dict["spec"]["replicas"] == 4 container_dict = worker_config_dict["spec"]["template"]["spec"]["containers"][0] - assert int(container_dict["resources"]["requests"]["memory"]) == 10 * 1024**3 - assert int(container_dict["resources"]["limits"]["memory"]) == 20 * 1024**3 + assert int(container_dict["resources"]["requests"]["memory"]) == 10 * 1024 ** 3 + assert int(container_dict["resources"]["limits"]["memory"]) == 20 * 1024 ** 3 container_envs = dict((p["name"], p) for p in container_dict["env"]) assert container_envs["MKL_NUM_THREADS"]["value"] == "2" assert container_envs["MARS_CPU_TOTAL"]["value"] == "2" - assert int(container_envs["MARS_MEMORY_TOTAL"]["value"]) == 10 * 1024**3 + assert int(container_envs["MARS_MEMORY_TOTAL"]["value"]) == 10 * 1024 ** 3 assert container_envs["MARS_LOAD_MODULES"]["value"] == "mars.test_mod" assert set(container_envs["MARS_SPILL_DIRS"]["value"].split(":")) == { "/tmp/empty", @@ -112,7 +112,7 @@ def test_worker_object(): worker_config_dict = MarsWorkersConfig( 4, cpu=2, - memory=10 * 1024**3, + memory=10 * 1024 ** 3, limit_resources=False, spill_volumes=[ "/tmp/spill_vol", diff --git a/mars/deploy/oscar/local.py b/mars/deploy/oscar/local.py index fba3c8c6c7..6398607d27 100644 --- a/mars/deploy/oscar/local.py +++ b/mars/deploy/oscar/local.py @@ -303,11 +303,7 @@ def __init__(self: ClientType, cluster: ClusterType, session: AbstractSession): self.session = session @classmethod - async def create( - cls, - cluster: LocalCluster, - timeout: float = None, - ) -> ClientType: + async def create(cls, cluster: LocalCluster, timeout: float = None,) -> ClientType: session = await _new_session( cluster.external_address, backend=cluster.backend, diff --git a/mars/deploy/oscar/ray.py b/mars/deploy/oscar/ray.py index 5f4d4ae650..16bfd8f5b5 100644 --- a/mars/deploy/oscar/ray.py +++ b/mars/deploy/oscar/ray.py @@ -308,10 +308,10 @@ async def _reconstruct_worker(): async def new_cluster( cluster_name: str = None, supervisor_cpu: int = 1, - supervisor_mem: int = 1 * 1024**3, + supervisor_mem: int = 1 * 1024 ** 3, worker_num: int = 1, worker_cpu: int = 2, - worker_mem: int = 2 * 1024**3, + worker_mem: int = 2 * 1024 ** 3, backend: str = None, config: Union[str, Dict] = None, **kwargs, @@ -407,10 +407,10 @@ def __init__( self, cluster_name: str, supervisor_cpu: Union[int, float] = 1, - supervisor_mem: int = 1 * 1024**3, + supervisor_mem: int = 1 * 1024 ** 3, worker_num: int = 1, worker_cpu: Union[int, float] = 2, - worker_mem: int = 2 * 1024**3, + worker_mem: int = 2 * 1024 ** 3, backend: str = None, config: Union[str, Dict] = None, n_supervisor_process: int = DEFAULT_SUPERVISOR_SUB_POOL_NUM, diff --git a/mars/deploy/oscar/session.py b/mars/deploy/oscar/session.py index 218bdce7ad..e041902fb7 100644 --- a/mars/deploy/oscar/session.py +++ b/mars/deploy/oscar/session.py @@ -287,11 +287,7 @@ async def _update_progress(self, task_id: str, progress: Progress): break async def _run_in_background( - self, - tileables: list, - task_id: str, - progress: Progress, - profiling: Profiling, + self, tileables: list, task_id: str, progress: Progress, profiling: Profiling, ): with enter_mode(build=True, kernel=True): # wait for task to finish @@ -384,9 +380,7 @@ async def execute(self, *tileables, **kwargs) -> ExecutionInfo: # submit task task_id = await self._task_api.submit_tileable_graph( - tileable_graph, - fuse_enabled=fuse_enabled, - extra_config=extra_config, + tileable_graph, fuse_enabled=fuse_enabled, extra_config=extra_config, ) progress = Progress() @@ -497,8 +491,7 @@ async def fetch(self, *tileables, **kwargs) -> list: chunks.append(chunk) get_chunk_metas.append( self._meta_api.get_chunk_meta.delay( - chunk.key, - fields=fetcher.required_meta_keys, + chunk.key, fields=fetcher.required_meta_keys, ) ) indexes = ( diff --git a/mars/deploy/oscar/tests/test_clean_up_and_restore_func.py b/mars/deploy/oscar/tests/test_clean_up_and_restore_func.py index a681f27267..3c7edfc324 100644 --- a/mars/deploy/oscar/tests/test_clean_up_and_restore_func.py +++ b/mars/deploy/oscar/tests/test_clean_up_and_restore_func.py @@ -127,13 +127,13 @@ def test_mars_backend_clean_up_and_restore_func(setup): sess = new_test_session(default=True, config=config) cols = [chr(ord("A") + i) for i in range(10)] - df_raw = pd.DataFrame(dict((c, [i**2 for i in range(20)]) for c in cols)) + df_raw = pd.DataFrame(dict((c, [i ** 2 for i in range(20)]) for c in cols)) df = md.DataFrame(df_raw, chunk_size=5) x_small = pd.Series([i for i in range(10)]) y_small = pd.Series([i for i in range(10)]) - x_large = pd.Series([i for i in range(10**4)]) - y_large = pd.Series([i for i in range(10**4)]) + x_large = pd.Series([i for i in range(10 ** 4)]) + y_large = pd.Series([i for i in range(10 ** 4)]) def closure_small(z): return pd.concat([x_small, y_small], ignore_index=True) @@ -162,15 +162,15 @@ def test_clean_up_and_restore_callable(setup, multiplier): sess = new_test_session(default=True, config=config) cols = [chr(ord("A") + i) for i in range(10)] - df_raw = pd.DataFrame(dict((c, [i**2 for i in range(20)]) for c in cols)) + df_raw = pd.DataFrame(dict((c, [i ** 2 for i in range(20)]) for c in cols)) df = md.DataFrame(df_raw, chunk_size=5) class callable_df: __slots__ = "x", "__dict__" def __init__(self, multiplier: int = 1): - self.x = pd.Series([i for i in range(10**multiplier)]) - self.y = pd.Series([i for i in range(10**multiplier)]) + self.x = pd.Series([i for i in range(10 ** multiplier)]) + self.y = pd.Series([i for i in range(10 ** multiplier)]) def __call__(self, pdf): return pd.concat([self.x, self.y], ignore_index=True) diff --git a/mars/deploy/oscar/tests/test_local.py b/mars/deploy/oscar/tests/test_local.py index f72f5326b9..b16363b267 100644 --- a/mars/deploy/oscar/tests/test_local.py +++ b/mars/deploy/oscar/tests/test_local.py @@ -371,11 +371,11 @@ async def test_execute_describe(create_cluster): async def test_execute_apply_closure(create_cluster): # DataFrame cols = [chr(ord("A") + i) for i in range(10)] - raw = pd.DataFrame(dict((c, [i**2 for i in range(20)]) for c in cols)) + raw = pd.DataFrame(dict((c, [i ** 2 for i in range(20)]) for c in cols)) df = md.DataFrame(raw, chunk_size=5) - x1 = pd.Series([i for i in range(10**4)]) - y1 = pd.Series([i for i in range(10**4)]) + x1 = pd.Series([i for i in range(10 ** 4)]) + y1 = pd.Series([i for i in range(10 ** 4)]) def dataframe_closure(z1): return pd.concat([x1, y1], ignore_index=True) @@ -394,7 +394,7 @@ def dataframe_closure(z1): # Series idxes = [chr(ord("A") + i) for i in range(20)] - s_raw = pd.Series([i**2 for i in range(20)], index=idxes) + s_raw = pd.Series([i ** 2 for i in range(20)], index=idxes) series = md.Series(s_raw, chunk_size=5) @@ -434,15 +434,15 @@ def series_closure(z2): async def test_execute_callable_closure(create_cluster, multiplier): # DataFrame cols = [chr(ord("A") + i) for i in range(10)] - raw = pd.DataFrame(dict((c, [i**2 for i in range(20)]) for c in cols)) + raw = pd.DataFrame(dict((c, [i ** 2 for i in range(20)]) for c in cols)) df = md.DataFrame(raw, chunk_size=5) class callable_df: __slots__ = "x", "__dict__" def __init__(self, multiplier: int = 1): - self.x = pd.Series([i for i in range(10**multiplier)]) - self.y = pd.Series([i for i in range(10**multiplier)]) + self.x = pd.Series([i for i in range(10 ** multiplier)]) + self.y = pd.Series([i for i in range(10 ** multiplier)]) def __call__(self, pdf): return pd.concat([self.x, self.y], ignore_index=True) @@ -1059,12 +1059,7 @@ async def speculative_cluster(): config["scheduling"]["speculation"]["max_concurrent_run"] = 10 config["scheduling"]["subtask_cancel_timeout"] = 0.1 config["scheduling"]["enable_kill_slot"] = True - client = await new_cluster( - config=config, - n_worker=5, - n_cpu=10, - use_uvloop=False, - ) + client = await new_cluster(config=config, n_worker=5, n_cpu=10, use_uvloop=False,) async with client: yield client diff --git a/mars/deploy/oscar/tests/test_ray.py b/mars/deploy/oscar/tests/test_ray.py index 17e37aaa3c..a717dd4792 100644 --- a/mars/deploy/oscar/tests/test_ray.py +++ b/mars/deploy/oscar/tests/test_ray.py @@ -78,10 +78,10 @@ async def create_cluster(request): ray_config = _load_config(CONFIG_FILE) ray_config.update(param.get("config", {})) client = await new_cluster( - supervisor_mem=1 * 1024**3, + supervisor_mem=1 * 1024 ** 3, worker_num=2, worker_cpu=2, - worker_mem=1 * 1024**3, + worker_mem=1 * 1024 ** 3, config=ray_config, ) async with client: diff --git a/mars/deploy/oscar/tests/test_ray_client.py b/mars/deploy/oscar/tests/test_ray_client.py index 43bc3bb99a..ddd7018fa3 100644 --- a/mars/deploy/oscar/tests/test_ray_client.py +++ b/mars/deploy/oscar/tests/test_ray_client.py @@ -28,11 +28,7 @@ @require_ray @pytest.mark.parametrize( - "backend", - [ - "mars", - "ray", - ], + "backend", ["mars", "ray",], ) def test_ray_client(backend): server_code = """import time diff --git a/mars/deploy/oscar/tests/test_ray_cluster_standalone.py b/mars/deploy/oscar/tests/test_ray_cluster_standalone.py index 3ff28b355d..50a3c3f806 100644 --- a/mars/deploy/oscar/tests/test_ray_cluster_standalone.py +++ b/mars/deploy/oscar/tests/test_ray_cluster_standalone.py @@ -43,11 +43,7 @@ def test_new_cluster_in_ray(stop_ray): @require_ray @pytest.mark.parametrize( - "backend", - [ - "mars", - "ray", - ], + "backend", ["mars", "ray",], ) def test_new_ray_session(stop_ray, backend): new_ray_session_test(backend) @@ -55,7 +51,7 @@ def test_new_ray_session(stop_ray, backend): def new_ray_session_test(backend): session = new_ray_session( - session_id="abc", worker_num=2, worker_mem=512 * 1024**2, backend=backend + session_id="abc", worker_num=2, worker_mem=512 * 1024 ** 2, backend=backend ) mt.random.RandomState(0).rand(100, 5).sum().execute() session.execute(mt.random.RandomState(0).rand(100, 5).sum()) @@ -64,7 +60,7 @@ def new_ray_session_test(backend): session_id="abcd", worker_num=2, default=True, - worker_mem=512 * 1024**2, + worker_mem=512 * 1024 ** 2, backend=backend, ) session.execute(mt.random.RandomState(0).rand(100, 5).sum()) @@ -106,10 +102,10 @@ async def test_optional_supervisor_node(ray_start_regular, test_option): config["cluster"]["ray"]["supervisor"]["sub_pool_num"] = supervisor_sub_pool_num client = await new_cluster( "test_cluster", - supervisor_mem=1 * 1024**3, + supervisor_mem=1 * 1024 ** 3, worker_num=2, worker_cpu=2, - worker_mem=1 * 1024**3, + worker_mem=1 * 1024 ** 3, config=config, ) async with client: diff --git a/mars/deploy/oscar/tests/test_ray_dag_failover.py b/mars/deploy/oscar/tests/test_ray_dag_failover.py index 75d550a7c5..9384e2420b 100644 --- a/mars/deploy/oscar/tests/test_ray_dag_failover.py +++ b/mars/deploy/oscar/tests/test_ray_dag_failover.py @@ -34,9 +34,7 @@ @require_ray @pytest.mark.parametrize( - "ray_large_cluster", - [{"num_nodes": 0}], - indirect=True, + "ray_large_cluster", [{"num_nodes": 0}], indirect=True, ) @pytest.mark.parametrize("reconstruction_enabled", [True, False]) @pytest.mark.skipif( @@ -65,7 +63,7 @@ def test_basic_object_reconstruction( ) ray.init(address=cluster.address) # Node to place the initial object. - node_to_kill = cluster.add_node(num_cpus=1, object_store_memory=10**8) + node_to_kill = cluster.add_node(num_cpus=1, object_store_memory=10 ** 8) mars.new_session( backend="ray", config={"scheduling.subtask_max_retries": subtask_max_retries}, @@ -82,7 +80,7 @@ def test_basic_object_reconstruction( head5 = df2.head(5).to_pandas() cluster.remove_node(node_to_kill, allow_graceful=False) - node_to_kill = cluster.add_node(num_cpus=1, object_store_memory=10**8) + node_to_kill = cluster.add_node(num_cpus=1, object_store_memory=10 ** 8) # use a dependent_task to avoid fetch lost objects to local @ray.remote @@ -102,7 +100,7 @@ def dependent_task(x): # Losing the object a second time will cause reconstruction to fail because # we have reached the max task retries. cluster.remove_node(node_to_kill, allow_graceful=False) - cluster.add_node(num_cpus=1, object_store_memory=10**8) + cluster.add_node(num_cpus=1, object_store_memory=10 ** 8) if reconstruction_enabled: with pytest.raises(ObjectReconstructionFailedMaxAttemptsExceededError): diff --git a/mars/deploy/oscar/tests/test_ray_dag_oscar.py b/mars/deploy/oscar/tests/test_ray_dag_oscar.py index 8238a34f7b..f1900c4db8 100644 --- a/mars/deploy/oscar/tests/test_ray_dag_oscar.py +++ b/mars/deploy/oscar/tests/test_ray_dag_oscar.py @@ -31,10 +31,10 @@ async def create_cluster(request): ray_config = _load_config(CONFIG_FILE) ray_config.update(param.get("config", {})) client = await new_cluster( - supervisor_mem=1 * 1024**3, + supervisor_mem=1 * 1024 ** 3, worker_num=2, worker_cpu=2, - worker_mem=1 * 1024**3, + worker_mem=1 * 1024 ** 3, backend="ray", config=ray_config, ) diff --git a/mars/deploy/oscar/tests/test_ray_fault_injection.py b/mars/deploy/oscar/tests/test_ray_fault_injection.py index 60387b9ac6..c31b53b310 100644 --- a/mars/deploy/oscar/tests/test_ray_fault_injection.py +++ b/mars/deploy/oscar/tests/test_ray_fault_injection.py @@ -38,10 +38,7 @@ "third_party_modules": ["mars.services.tests.fault_injection_patch"], } SUBTASK_RERUN_CONFIG = { - "scheduling": { - "subtask_max_retries": 2, - "subtask_max_reschedules": 2, - } + "scheduling": {"subtask_max_retries": 2, "subtask_max_reschedules": 2,} } @@ -52,10 +49,7 @@ async def fault_cluster(request): ray_config.update(FAULT_INJECTION_CONFIG) ray_config.update(param.get("config", {})) client = await new_cluster( - worker_num=2, - worker_cpu=2, - worker_mem=1 * 1024**3, - config=ray_config, + worker_num=2, worker_cpu=2, worker_mem=1 * 1024 ** 3, config=ray_config, ) async with client: yield client diff --git a/mars/deploy/oscar/tests/test_ray_load_modules.py b/mars/deploy/oscar/tests/test_ray_load_modules.py index 46b430d0a5..9514659ebd 100644 --- a/mars/deploy/oscar/tests/test_ray_load_modules.py +++ b/mars/deploy/oscar/tests/test_ray_load_modules.py @@ -44,10 +44,10 @@ async def create_cluster(request): ray_config = _load_config(CONFIG_FILE) ray_config.update(param.get("config", {})) client = await new_cluster( - supervisor_mem=1 * 1024**3, + supervisor_mem=1 * 1024 ** 3, worker_num=2, worker_cpu=2, - worker_mem=1 * 1024**3, + worker_mem=1 * 1024 ** 3, config=ray_config, ) async with client: @@ -77,10 +77,7 @@ async def test_load_third_party_modules(ray_start_regular, config_exception): config["third_party_modules"] = third_party_modules_config with expected_exception: await new_cluster( - worker_num=1, - worker_cpu=1, - worker_mem=1 * 1024**3, - config=config, + worker_num=1, worker_cpu=1, worker_mem=1 * 1024 ** 3, config=config, ) @@ -121,10 +118,10 @@ async def test_load_third_party_modules_from_config( ray_start_regular, cleanup_third_party_modules_output # noqa: F811 ): client = await new_cluster( - supervisor_mem=1 * 1024**3, + supervisor_mem=1 * 1024 ** 3, worker_num=1, worker_cpu=1, - worker_mem=1 * 1024**3, + worker_mem=1 * 1024 ** 3, config=CONFIG_THIRD_PARTY_MODULES_TEST_FILE, ) async with client: diff --git a/mars/deploy/oscar/tests/test_ray_scheduling.py b/mars/deploy/oscar/tests/test_ray_scheduling.py index 97c8cb3c8f..a0f55de089 100644 --- a/mars/deploy/oscar/tests/test_ray_scheduling.py +++ b/mars/deploy/oscar/tests/test_ray_scheduling.py @@ -46,8 +46,8 @@ async def speculative_cluster(): "test_cluster", worker_num=5, worker_cpu=2, - worker_mem=512 * 1024**2, - supervisor_mem=100 * 1024**2, + worker_mem=512 * 1024 ** 2, + supervisor_mem=100 * 1024 ** 2, config={ "scheduling": { "speculation": { @@ -81,7 +81,7 @@ async def test_task_speculation_execution(ray_large_cluster, speculative_cluster @require_ray @pytest.mark.asyncio async def test_request_worker(ray_large_cluster): - worker_cpu, worker_mem = 1, 100 * 1024**2 + worker_cpu, worker_mem = 1, 100 * 1024 ** 2 client = await new_cluster( worker_num=0, worker_cpu=worker_cpu, worker_mem=worker_mem ) @@ -111,7 +111,7 @@ async def test_request_worker(ray_large_cluster): @require_ray @pytest.mark.asyncio async def test_reconstruct_worker(ray_large_cluster): - worker_cpu, worker_mem = 1, 100 * 1024**2 + worker_cpu, worker_mem = 1, 100 * 1024 ** 2 client = await new_cluster( worker_num=0, worker_cpu=worker_cpu, worker_mem=worker_mem ) @@ -163,8 +163,8 @@ async def test_auto_scale_out(ray_large_cluster, init_workers: int): client = await new_cluster( worker_num=init_workers, worker_cpu=2, - worker_mem=200 * 1024**2, - supervisor_mem=1 * 1024**3, + worker_mem=200 * 1024 ** 2, + supervisor_mem=1 * 1024 ** 3, config={ "scheduling.autoscale.enabled": True, "scheduling.autoscale.scheduler_backlog_timeout": 1, @@ -209,8 +209,8 @@ async def test_auto_scale_in(ray_large_cluster): client = await new_cluster( worker_num=0, worker_cpu=2, - worker_mem=200 * 1024**2, - supervisor_mem=1 * 1024**3, + worker_mem=200 * 1024 ** 2, + supervisor_mem=1 * 1024 ** 3, config=config, ) async with client: @@ -243,8 +243,8 @@ async def test_ownership_when_scale_in(ray_large_cluster): client = await new_cluster( worker_num=0, worker_cpu=2, - worker_mem=1 * 1024**3, - supervisor_mem=200 * 1024**2, + worker_mem=1 * 1024 ** 3, + supervisor_mem=200 * 1024 ** 2, config={ "scheduling.autoscale.enabled": True, "scheduling.autoscale.scheduler_check_interval": 0.1, @@ -273,10 +273,7 @@ def f(pdf, latch): ray.get(latch.wait.remote()) return pdf - df = df.map_chunk( - f, - args=(latch_actor,), - ) + df = df.map_chunk(f, args=(latch_actor,),) info = df.execute(wait=False) while await autoscaler_ref.get_dynamic_worker_nums() <= 1: logger.info("Waiting workers to be created.") diff --git a/mars/deploy/yarn/tests/test_config.py b/mars/deploy/yarn/tests/test_config.py index 3c175ad5db..0537950287 100644 --- a/mars/deploy/yarn/tests/test_config.py +++ b/mars/deploy/yarn/tests/test_config.py @@ -63,7 +63,7 @@ def test_supervisor_config(): assert config_envs["TEST_ENV"] == "test_val" assert config_envs["MKL_NUM_THREADS"] == "2" assert config_envs["MARS_CPU_TOTAL"] == "2" - assert int(config_envs["MARS_MEMORY_TOTAL"]) == 10 * 1024**3 + assert int(config_envs["MARS_MEMORY_TOTAL"]) == 10 * 1024 ** 3 assert config_envs["MARS_LOAD_MODULES"] == "mars.test_mod" config = MarsSupervisorConfig( diff --git a/mars/learn/cluster/_k_means_elkan_iter.py b/mars/learn/cluster/_k_means_elkan_iter.py index 55aa134df2..f01088d40c 100644 --- a/mars/learn/cluster/_k_means_elkan_iter.py +++ b/mars/learn/cluster/_k_means_elkan_iter.py @@ -402,9 +402,15 @@ def tile(cls, op: "KMeansElkanUpdate"): out_chunks = [list() for _ in range(op.output_limit)] for i in range(x.chunk_shape[0]): x_chunk = x.cix[i, 0] - sample_weight_chunk = sample_weight.cix[i,] - labels_chunk = labels.cix[i,] - upper_bounds_chunk = upper_bounds.cix[i,] + sample_weight_chunk = sample_weight.cix[ + i, + ] + labels_chunk = labels.cix[ + i, + ] + upper_bounds_chunk = upper_bounds.cix[ + i, + ] lower_bounds_chunk = lower_bounds.cix[i, 0] chunk_op = op.copy().reset_key() chunk_op.stage = OperandStage.map diff --git a/mars/learn/cluster/_k_means_lloyd_iter.py b/mars/learn/cluster/_k_means_lloyd_iter.py index 40801684ae..755c97cb9a 100644 --- a/mars/learn/cluster/_k_means_lloyd_iter.py +++ b/mars/learn/cluster/_k_means_lloyd_iter.py @@ -148,9 +148,15 @@ def tile(cls, op: "KMeansLloydUpdate"): labels_chunks, centers_new_chunks, weight_in_clusters_chunks = [], [], [] for i in range(x.chunk_shape[0]): x_chunk = x.cix[i, 0] - sample_weight_chunk = sample_weight.cix[i,] - x_squared_norms_chunk = x_squared_norms.cix[i,] - labels_chunk = labels.cix[i,] + sample_weight_chunk = sample_weight.cix[ + i, + ] + x_squared_norms_chunk = x_squared_norms.cix[ + i, + ] + labels_chunk = labels.cix[ + i, + ] chunk_op = op.copy().reset_key() chunk_op.stage = OperandStage.map chunk_kws = [ diff --git a/mars/learn/cluster/_kmeans.py b/mars/learn/cluster/_kmeans.py index 6f6c196488..b97e0bd8bd 100644 --- a/mars/learn/cluster/_kmeans.py +++ b/mars/learn/cluster/_kmeans.py @@ -289,7 +289,7 @@ def _kmeans_single_elkan( inertia = _inertia(X, sample_weight, centers, labels) to_runs.append(inertia) - center_shift_tot = (center_shift**2).sum() + center_shift_tot = (center_shift ** 2).sum() to_runs.append(center_shift_tot) mt.ExecutableTuple(to_runs).execute(session=session, **(run_kwargs or dict())) @@ -391,7 +391,7 @@ def _kmeans_single_lloyd( inertia = _inertia(X, sample_weight, centers, labels) to_runs.append(inertia) - center_shift_tot = (center_shift**2).sum() + center_shift_tot = (center_shift ** 2).sum() to_runs.append(center_shift_tot) mt.ExecutableTuple(to_runs).execute(session=session, **(run_kwargs or dict())) diff --git a/mars/learn/cluster/tests/test_k_means.py b/mars/learn/cluster/tests/test_k_means.py index f86aa8d3b1..e48bf4819c 100644 --- a/mars/learn/cluster/tests/test_k_means.py +++ b/mars/learn/cluster/tests/test_k_means.py @@ -487,10 +487,7 @@ def test_k_means_function(setup): # check warning when centers are passed with pytest.warns(RuntimeWarning): k_means( - X, - n_clusters=n_clusters, - sample_weight=None, - init=centers, + X, n_clusters=n_clusters, sample_weight=None, init=centers, ) # to many clusters desired diff --git a/mars/learn/contrib/lightgbm/tests/test_classifier.py b/mars/learn/contrib/lightgbm/tests/test_classifier.py index 28fd623421..9a887aefca 100644 --- a/mars/learn/contrib/lightgbm/tests/test_classifier.py +++ b/mars/learn/contrib/lightgbm/tests/test_classifier.py @@ -51,10 +51,7 @@ async def create_cluster(): start_method = os.environ.get("POOL_START_METHOD", None) client = await new_cluster( - subprocess_start_method=start_method, - n_worker=2, - n_cpu=4, - use_uvloop=False, + subprocess_start_method=start_method, n_worker=2, n_cpu=4, use_uvloop=False, ) async with client: yield client diff --git a/mars/learn/contrib/statsmodels/predict.py b/mars/learn/contrib/statsmodels/predict.py index 8703a2525c..66071860ef 100644 --- a/mars/learn/contrib/statsmodels/predict.py +++ b/mars/learn/contrib/statsmodels/predict.py @@ -64,10 +64,7 @@ def __call__(self, exog): ) else: self._output_types = [OutputType.tensor] - kwargs = dict( - shape=exog.shape[:1], - dtype=np.dtype("float"), - ) + kwargs = dict(shape=exog.shape[:1], dtype=np.dtype("float"),) return self.new_tileable([exog], **kwargs) @classmethod @@ -89,9 +86,7 @@ def tile(cls, op: "StatsModelsPredict"): ) else: kwargs = dict( - index=in_chunk.index[:1], - shape=in_chunk.shape[:1], - dtype=out.dtype, + index=in_chunk.index[:1], shape=in_chunk.shape[:1], dtype=out.dtype, ) new_op = op.copy().reset_key() diff --git a/mars/learn/contrib/xgboost/core.py b/mars/learn/contrib/xgboost/core.py index 5f9e96f95c..abc807d213 100644 --- a/mars/learn/contrib/xgboost/core.py +++ b/mars/learn/contrib/xgboost/core.py @@ -140,10 +140,7 @@ def validate_or_none(meta: Optional[List], name: str) -> List: else: if any( meta is not None - for meta in [ - sample_weight_eval_set, - base_margin_eval_set, - ] + for meta in [sample_weight_eval_set, base_margin_eval_set,] ): raise ValueError( "`eval_set` is not set but one of the other evaluation meta info is " diff --git a/mars/learn/contrib/xgboost/dmatrix.py b/mars/learn/contrib/xgboost/dmatrix.py index 770df7b705..7569f14275 100644 --- a/mars/learn/contrib/xgboost/dmatrix.py +++ b/mars/learn/contrib/xgboost/dmatrix.py @@ -117,7 +117,9 @@ def _get_collocated( for type_name, inp in zip(types[1:], [label, weight, base_margin]): if inp is None: continue - inp_chunk = inp.cix[i,] + inp_chunk = inp.cix[ + i, + ] setattr(chunk_op, type_name, inp_chunk) inps.append(inp_chunk) kw = cls._get_kw(inp_chunk) diff --git a/mars/learn/contrib/xgboost/start_tracker.py b/mars/learn/contrib/xgboost/start_tracker.py index feadbcbdb3..daebef3a5e 100644 --- a/mars/learn/contrib/xgboost/start_tracker.py +++ b/mars/learn/contrib/xgboost/start_tracker.py @@ -29,9 +29,7 @@ class StartTracker(LearnOperand, LearnOperandMixin): def __init__(self, output_types=None, pure_depends=None, **kw): super().__init__( - _output_types=output_types, - _pure_depends=pure_depends, - **kw, + _output_types=output_types, _pure_depends=pure_depends, **kw, ) if self.output_types is None: self.output_types = [OutputType.object] diff --git a/mars/learn/decomposition/_base.py b/mars/learn/decomposition/_base.py index fc54ed514f..e1d2bad1d1 100644 --- a/mars/learn/decomposition/_base.py +++ b/mars/learn/decomposition/_base.py @@ -89,7 +89,7 @@ def get_precision(self, session=None): precision = mt.dot(components_, components_.T) / self.noise_variance_ precision.flat[:: len(precision) + 1] += 1.0 / exp_var_diff precision = mt.dot(components_.T, mt.dot(linalg.inv(precision), components_)) - precision /= -(self.noise_variance_**2) + precision /= -(self.noise_variance_ ** 2) precision.flat[:: len(precision) + 1] += 1.0 / self.noise_variance_ precision.execute(session=session) return precision diff --git a/mars/learn/decomposition/_pca.py b/mars/learn/decomposition/_pca.py index 11314a7556..f63e1f3b32 100644 --- a/mars/learn/decomposition/_pca.py +++ b/mars/learn/decomposition/_pca.py @@ -470,7 +470,7 @@ def _fit_full(self, X, n_components, session=None, run_kwargs=None): components_ = V # Get variance explained by singular values - explained_variance_ = (S**2) / (n_samples - 1) + explained_variance_ = (S ** 2) / (n_samples - 1) total_var = explained_variance_.sum() explained_variance_ratio_ = explained_variance_ / total_var singular_values_ = S.copy() # Store the singular values. @@ -575,7 +575,7 @@ def _fit_truncated(self, X, n_components, svd_solver): self.n_components_ = n_components # Get variance explained by singular values - self.explained_variance_ = (S**2) / (n_samples - 1) + self.explained_variance_ = (S ** 2) / (n_samples - 1) total_var = mt.var(X, ddof=1, axis=0) self.explained_variance_ratio_ = self.explained_variance_ / total_var.sum() self.singular_values_ = S.copy() # Store the singular values. diff --git a/mars/learn/decomposition/tests/test_pca.py b/mars/learn/decomposition/tests/test_pca.py index a2cf5a9008..626865fa5c 100644 --- a/mars/learn/decomposition/tests/test_pca.py +++ b/mars/learn/decomposition/tests/test_pca.py @@ -235,12 +235,12 @@ def test_singular_values(setup): X_pca = pca.transform(X) X_rpca = rpca.transform(X) assert_array_almost_equal( - mt.sum(pca.singular_values_**2.0).to_numpy(), + mt.sum(pca.singular_values_ ** 2.0).to_numpy(), (mt.linalg.norm(X_pca, "fro") ** 2.0).to_numpy(), 12, ) assert_array_almost_equal( - mt.sum(rpca.singular_values_**2.0).to_numpy(), + mt.sum(rpca.singular_values_ ** 2.0).to_numpy(), (mt.linalg.norm(X_rpca, "fro") ** 2.0).to_numpy(), 0, ) @@ -248,12 +248,12 @@ def test_singular_values(setup): # Compare to the 2-norms of the score vectors assert_array_almost_equal( pca.singular_values_.fetch(), - mt.sqrt(mt.sum(X_pca**2.0, axis=0)).to_numpy(), + mt.sqrt(mt.sum(X_pca ** 2.0, axis=0)).to_numpy(), 12, ) assert_array_almost_equal( rpca.singular_values_.fetch(), - mt.sqrt(mt.sum(X_rpca**2.0, axis=0)).to_numpy(), + mt.sqrt(mt.sum(X_rpca ** 2.0, axis=0)).to_numpy(), 2, ) @@ -268,7 +268,7 @@ def test_singular_values(setup): rpca = PCA(n_components=3, svd_solver="randomized", random_state=rng) X_pca = pca.fit_transform(X) - X_pca /= mt.sqrt(mt.sum(X_pca**2.0, axis=0)) + X_pca /= mt.sqrt(mt.sum(X_pca ** 2.0, axis=0)) X_pca[:, 0] *= 3.142 X_pca[:, 1] *= 2.718 @@ -289,7 +289,7 @@ def test_pca_check_projection(setup): for solver in solver_list: Yt = PCA(n_components=2, svd_solver=solver).fit(X).transform(Xt) - Yt /= mt.sqrt((Yt**2).sum()) + Yt /= mt.sqrt((Yt ** 2).sum()) assert_almost_equal(mt.abs(Yt[0][0]).to_numpy(), 1.0, 1) @@ -378,7 +378,7 @@ def test_randomized_pca_check_projection(setup): .fit(X) .transform(Xt) ) - Yt /= np.sqrt((Yt**2).sum()) + Yt /= np.sqrt((Yt ** 2).sum()) assert_almost_equal(mt.abs(Yt[0][0]).to_numpy(), 1.0, 1) @@ -524,7 +524,7 @@ def test_pca_score(setup): pca = PCA(n_components=2, svd_solver=solver) pca.fit(X) ll1 = pca.score(X) - h = -0.5 * mt.log(2 * mt.pi * mt.exp(1) * 0.1**2) * p + h = -0.5 * mt.log(2 * mt.pi * mt.exp(1) * 0.1 ** 2) * p np.testing.assert_almost_equal((ll1 / h).to_numpy(), 1, 0) diff --git a/mars/learn/decomposition/tests/test_truncated_svd.py b/mars/learn/decomposition/tests/test_truncated_svd.py index f84b546ad9..74aaedcda1 100644 --- a/mars/learn/decomposition/tests/test_truncated_svd.py +++ b/mars/learn/decomposition/tests/test_truncated_svd.py @@ -158,7 +158,7 @@ def test_singular_values(setup): rpca = TruncatedSVD(n_components=3, algorithm="randomized", random_state=rng) X_rpca = rpca.fit_transform(X) - X_rpca /= mt.sqrt(mt.sum(X_rpca**2.0, axis=0)) + X_rpca /= mt.sqrt(mt.sum(X_rpca ** 2.0, axis=0)) X_rpca[:, 0] *= 3.142 X_rpca[:, 1] *= 2.718 diff --git a/mars/learn/ensemble/_bagging.py b/mars/learn/ensemble/_bagging.py index fd3c77a796..7d10e5e981 100644 --- a/mars/learn/ensemble/_bagging.py +++ b/mars/learn/ensemble/_bagging.py @@ -529,12 +529,7 @@ def _execute_map(cls, ctx, op: "BaggingSample"): for ( reducer_id, - ( - samples, - labels, - weights, - feature_idx_array, - ), + (samples, labels, weights, feature_idx_array,), ) in result_store.items(): ctx[out_samples.key, (reducer_id, 0)] = ( ctx.get_current_chunk().index, @@ -1463,8 +1458,7 @@ def _predict_proba(self, X): check_is_fitted(self) X = convert_to_tensor_or_dataframe(X) predict_op = BaggingPredictionOperand( - n_classes=self.n_classes_, - prediction_type=PredictionType.PROBABILITY, + n_classes=self.n_classes_, prediction_type=PredictionType.PROBABILITY, ) return predict_op(X, self.estimators_, self.estimator_features_) @@ -1540,8 +1534,7 @@ def predict_log_proba(self, X, session=None, run_kwargs=None): check_is_fitted(self) X = convert_to_tensor_or_dataframe(X) predict_op = BaggingPredictionOperand( - n_classes=self.n_classes_, - prediction_type=PredictionType.LOG_PROBABILITY, + n_classes=self.n_classes_, prediction_type=PredictionType.LOG_PROBABILITY, ) probas = predict_op(X, self.estimators_, self.estimator_features_) return execute(probas, session=session, **(run_kwargs or dict())) @@ -1567,8 +1560,7 @@ def decision_function(self, X, session=None, run_kwargs=None): check_is_fitted(self) X = convert_to_tensor_or_dataframe(X) predict_op = BaggingPredictionOperand( - n_classes=self.n_classes_, - prediction_type=PredictionType.DECISION_FUNCTION, + n_classes=self.n_classes_, prediction_type=PredictionType.DECISION_FUNCTION, ) result = predict_op(X, self.estimators_, self.estimator_features_) return execute(result, session=session, **(run_kwargs or dict())) diff --git a/mars/learn/ensemble/_blockwise.py b/mars/learn/ensemble/_blockwise.py index 014b547d1b..c87e44ba45 100644 --- a/mars/learn/ensemble/_blockwise.py +++ b/mars/learn/ensemble/_blockwise.py @@ -79,10 +79,7 @@ def tile(cls, op: "BlockwiseEnsembleFit"): out_chunks = [] for i, _ in enumerate(x_split): chunk_op = op.copy().reset_key() - out_chunk = chunk_op.new_chunk( - [X.cix[i, 0], y.cix[(i,)]], - index=(i,), - ) + out_chunk = chunk_op.new_chunk([X.cix[i, 0], y.cix[(i,)]], index=(i,),) out_chunks.append(out_chunk) params = out.params.copy() diff --git a/mars/learn/ensemble/tests/test_bagging.py b/mars/learn/ensemble/tests/test_bagging.py index b613439663..5ae9fa7e59 100644 --- a/mars/learn/ensemble/tests/test_bagging.py +++ b/mars/learn/ensemble/tests/test_bagging.py @@ -204,10 +204,7 @@ def test_bagging_sample_reindex( @pytest.mark.parametrize( "use_dataframe, max_samples, max_features, with_weights, base_estimator_cls", - [ - (False, 10, 0.5, False, LogisticRegression), - (True, 10, 1.0, True, SVC), - ], + [(False, 10, 0.5, False, LogisticRegression), (True, 10, 1.0, True, SVC),], ) def test_bagging_classifier( setup, use_dataframe, max_samples, max_features, with_weights, base_estimator_cls @@ -281,10 +278,7 @@ def test_bagging_classifier( @pytest.mark.parametrize( "use_dataframe, max_samples, max_features, with_weights", - [ - (False, 10, 0.5, False), - (True, 10, 1.0, True), - ], + [(False, 10, 0.5, False), (True, 10, 1.0, True),], ) def test_bagging_regressor( setup, use_dataframe, max_samples, max_features, with_weights diff --git a/mars/learn/ensemble/tests/test_blockwise.py b/mars/learn/ensemble/tests/test_blockwise.py index 3fbf1d57c3..322a72b5cd 100644 --- a/mars/learn/ensemble/tests/test_blockwise.py +++ b/mars/learn/ensemble/tests/test_blockwise.py @@ -66,9 +66,7 @@ def test_blockwise_voting_classifier_hard(setup, fit_X, fit_y, predict_X, predic ) def test_blockwise_voting_classifier_soft(setup, fit_X, fit_y, predict_X, predict_y): clf = BlockwiseVotingClassifier( - LogisticRegression(solver="lbfgs"), - voting="soft", - classes=[0, 1], + LogisticRegression(solver="lbfgs"), voting="soft", classes=[0, 1], ) clf.fit(fit_X, fit_y) estimators = clf.estimators_.fetch() diff --git a/mars/learn/linear_model/_base.py b/mars/learn/linear_model/_base.py index 89a68f19a4..88c3ccd4a6 100644 --- a/mars/learn/linear_model/_base.py +++ b/mars/learn/linear_model/_base.py @@ -230,12 +230,7 @@ class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel): @_deprecate_positional_args def __init__( - self, - *, - fit_intercept=True, - normalize=False, - copy_X=True, - positive=False, + self, *, fit_intercept=True, normalize=False, copy_X=True, positive=False, ): self.fit_intercept = fit_intercept self.normalize = normalize diff --git a/mars/learn/metrics/_classification.py b/mars/learn/metrics/_classification.py index ef18f68a91..4e4f34fa51 100644 --- a/mars/learn/metrics/_classification.py +++ b/mars/learn/metrics/_classification.py @@ -48,7 +48,7 @@ def __init__( normalize=None, sample_weight=None, type_true=None, - **kw + **kw, ): super().__init__( _y_true=y_true, @@ -56,7 +56,7 @@ def __init__( _normalize=normalize, _sample_weight=sample_weight, _type_true=type_true, - **kw + **kw, ) self.output_types = [OutputType.tensor] @@ -347,7 +347,7 @@ def multilabel_confusion_matrix( labels=None, samplewise=False, session=None, - run_kwargs=None + run_kwargs=None, ): """ Compute a confusion matrix for each class or sample. @@ -699,7 +699,7 @@ def precision_recall_fscore_support( sample_weight=None, zero_division="warn", session=None, - run_kwargs=None + run_kwargs=None, ): """Compute precision, recall, F-measure and support for each class @@ -888,7 +888,7 @@ def precision_recall_fscore_support( execute(true_sum, **exec_kw) # Finally, we have all our sufficient statistics. Divide! # - beta2 = beta**2 + beta2 = beta ** 2 # Divide, and on zero-division, set scores and/or warn according to # zero_division: @@ -960,7 +960,7 @@ def precision_score( pos_label=1, average="binary", sample_weight=None, - zero_division="warn" + zero_division="warn", ): """Compute the precision @@ -1086,7 +1086,7 @@ def recall_score( pos_label=1, average="binary", sample_weight=None, - zero_division="warn" + zero_division="warn", ): """Compute the recall @@ -1211,7 +1211,7 @@ def f1_score( pos_label=1, average="binary", sample_weight=None, - zero_division="warn" + zero_division="warn", ): """Compute the F1 score, also known as balanced F-score or F-measure @@ -1347,7 +1347,7 @@ def fbeta_score( pos_label=1, average="binary", sample_weight=None, - zero_division="warn" + zero_division="warn", ): """Compute the F-beta score @@ -1474,6 +1474,7 @@ def fbeta_score( ) return f + def classification_report( y_true, y_pred, @@ -1696,7 +1697,6 @@ class 2 1.00 0.67 0.80 3 else: return report - # @validate_params( # { # "y_true": ["array-like", "sparse matrix"], @@ -1704,4 +1704,4 @@ class 2 1.00 0.67 0.80 3 # "sample_weight": ["array-like", None], # }, # prefer_skip_nested_validation=True, - # ) \ No newline at end of file + # ) diff --git a/mars/learn/metrics/_ranking.py b/mars/learn/metrics/_ranking.py index 3517f12372..b0e4ec4316 100644 --- a/mars/learn/metrics/_ranking.py +++ b/mars/learn/metrics/_ranking.py @@ -270,7 +270,7 @@ def _binary_roc_auc_score( # McClish correction: standardize result to be 0.5 if non-discriminant # and 1 if maximal - min_area = 0.5 * max_fpr**2 + min_area = 0.5 * max_fpr ** 2 max_area = max_fpr return 0.5 * ( 1 + (partial_auc.fetch(session=session) - min_area) / (max_area - min_area) diff --git a/mars/learn/metrics/_scorer.py b/mars/learn/metrics/_scorer.py index 1447b87978..ea26bd182e 100644 --- a/mars/learn/metrics/_scorer.py +++ b/mars/learn/metrics/_scorer.py @@ -24,11 +24,7 @@ neg_log_loss_scorer = make_scorer(log_loss, greater_is_better=False, needs_proba=True) -SCORERS = dict( - r2=r2_score, - accuracy=accuracy_score, - neg_log_loss=neg_log_loss_scorer, -) +SCORERS = dict(r2=r2_score, accuracy=accuracy_score, neg_log_loss=neg_log_loss_scorer,) def get_scorer(score_func: Union[str, Callable], **kwargs) -> Callable: diff --git a/mars/learn/metrics/pairwise/core.py b/mars/learn/metrics/pairwise/core.py index 406e36705e..a717a45a06 100644 --- a/mars/learn/metrics/pairwise/core.py +++ b/mars/learn/metrics/pairwise/core.py @@ -110,10 +110,7 @@ def _tile_chunks(cls, op, x, y): chunk_inputs = [x.cix[xi, 0], y.cix[yi, 0]] out_chunk = chunk_op.new_chunk( chunk_inputs, - shape=( - chunk_inputs[0].shape[0], - chunk_inputs[1].shape[0], - ), + shape=(chunk_inputs[0].shape[0], chunk_inputs[1].shape[0],), order=out.order, index=idx, ) diff --git a/mars/learn/metrics/pairwise/tests/test_euclidean_distances.py b/mars/learn/metrics/pairwise/tests/test_euclidean_distances.py index 2e1fa2a632..991f65e9e6 100644 --- a/mars/learn/metrics/pairwise/tests/test_euclidean_distances.py +++ b/mars/learn/metrics/pairwise/tests/test_euclidean_distances.py @@ -84,13 +84,13 @@ def test_euclidean_distances_execution(setup): ) np.testing.assert_almost_equal(result, expected) - x_sq = (x**2).astype(np.float32) - y_sq = (y**2).astype(np.float32) + x_sq = (x ** 2).astype(np.float32) + y_sq = (y ** 2).astype(np.float32) distance = euclidean_distances(x_sq, y_sq, squared=True) - x_raw_sq = (raw_x**2).astype(np.float32) - y_raw_sq = (raw_y**2).astype(np.float32) + x_raw_sq = (raw_x ** 2).astype(np.float32) + y_raw_sq = (raw_y ** 2).astype(np.float32) result = distance.execute().fetch() expected = sk_euclidean_distances(x_raw_sq, y_raw_sq, squared=True) diff --git a/mars/learn/neighbors/_faiss.py b/mars/learn/neighbors/_faiss.py index ff17cd68f2..9bbac5cbe9 100644 --- a/mars/learn/neighbors/_faiss.py +++ b/mars/learn/neighbors/_faiss.py @@ -486,7 +486,7 @@ def _gen_index_string_and_sample_count( size, dim = shape memory_require = _get_memory_require(memory_require) - if accuracy or size < 10**5: + if accuracy or size < 10 ** 5: # Flat is the only index that guarantees exact results # no need to train, thus sample count is None return "Flat", None @@ -518,7 +518,7 @@ def _gen_index_string_and_sample_count( raise ValueError("unknown memory require") # now choose the clustering options - if size < 10**6 or (size < 10**7 and gpu): + if size < 10 ** 6 or (size < 10 ** 7 and gpu): # < 1M, or <10M but need GPU k = kw.get("k", 5 * int(np.sqrt(size))) if k < 4 * int(np.sqrt(size)) or k > 16 * int(np.sqrt(size)): @@ -529,13 +529,13 @@ def _gen_index_string_and_sample_count( if n_sample is None: # 30 * k - 256 * k n_sample = min(30 * k, size) - elif size < 10**7 and not gpu: + elif size < 10 ** 7 and not gpu: # 1M - 10M index_str = basement.format("IVF65536_HNSW32") if n_sample is None: # between 30 * 65536 and 256 * 65536 n_sample = 32 * 65536 - elif size < 10**8: + elif size < 10 ** 8: index_str = basement.format("IVF65536_HNSW32") n_sample = 64 * 65536 if n_sample is None else n_sample else: diff --git a/mars/learn/neighbors/tests/test_faiss.py b/mars/learn/neighbors/tests/test_faiss.py index afc47f69de..673d9e78d8 100644 --- a/mars/learn/neighbors/tests/test_faiss.py +++ b/mars/learn/neighbors/tests/test_faiss.py @@ -169,54 +169,54 @@ def test_gen_index_string_and_sample_count(setup): d = 32 # accuracy=True, could be Flat only - ret = _gen_index_string_and_sample_count((10**9, d), None, True, "minimum") + ret = _gen_index_string_and_sample_count((10 ** 9, d), None, True, "minimum") assert ret == ("Flat", None) # no memory concern - ret = _gen_index_string_and_sample_count((10**5, d), None, False, "maximum") + ret = _gen_index_string_and_sample_count((10 ** 5, d), None, False, "maximum") assert ret == ("HNSW32", None) index = faiss.index_factory(d, ret[0]) assert index.is_trained is True # memory concern not much - ret = _gen_index_string_and_sample_count((10**5, d), None, False, "high") + ret = _gen_index_string_and_sample_count((10 ** 5, d), None, False, "high") assert ret == ("IVF1580,Flat", 47400) index = faiss.index_factory(d, ret[0]) assert index.is_trained is False # memory quite important - ret = _gen_index_string_and_sample_count((5 * 10**6, d), None, False, "low") + ret = _gen_index_string_and_sample_count((5 * 10 ** 6, d), None, False, "low") assert ret == ("PCAR16,IVF65536_HNSW32,SQ8", 32 * 65536) index = faiss.index_factory(d, ret[0]) assert index.is_trained is False # memory very important - ret = _gen_index_string_and_sample_count((10**8, d), None, False, "minimum") + ret = _gen_index_string_and_sample_count((10 ** 8, d), None, False, "minimum") assert ret == ("OPQ16_32,IVF1048576_HNSW32,PQ16", 64 * 65536) index = faiss.index_factory(d, ret[0]) assert index.is_trained is False - ret = _gen_index_string_and_sample_count((10**10, d), None, False, "low") + ret = _gen_index_string_and_sample_count((10 ** 10, d), None, False, "low") assert ret == ("PCAR16,IVF1048576_HNSW32,SQ8", 64 * 65536) index = faiss.index_factory(d, ret[0]) assert index.is_trained is False with pytest.raises(ValueError): # M > 64 raise error - _gen_index_string_and_sample_count((10**5, d), None, False, "maximum", M=128) + _gen_index_string_and_sample_count((10 ** 5, d), None, False, "maximum", M=128) with pytest.raises(ValueError): # M > 64 - _gen_index_string_and_sample_count((10**5, d), None, False, "minimum", M=128) + _gen_index_string_and_sample_count((10 ** 5, d), None, False, "minimum", M=128) with pytest.raises(ValueError): # dim should be multiple of M _gen_index_string_and_sample_count( - (10**5, d), None, False, "minimum", M=16, dim=17 + (10 ** 5, d), None, False, "minimum", M=16, dim=17 ) with pytest.raises(ValueError): - _gen_index_string_and_sample_count((10**5, d), None, False, "low", k=5) + _gen_index_string_and_sample_count((10 ** 5, d), None, False, "low", k=5) @pytest.mark.skipif(faiss is None, reason="faiss not installed") diff --git a/mars/learn/preprocessing/normalize.py b/mars/learn/preprocessing/normalize.py index d7381cb0ce..778373c5a3 100644 --- a/mars/learn/preprocessing/normalize.py +++ b/mars/learn/preprocessing/normalize.py @@ -214,7 +214,7 @@ def tile(cls, op): if norm == "l1": norms = mt.abs(x).sum(axis=axis) elif norm == "l2": - norms = mt.sqrt((x**2).sum(axis=axis)) + norms = mt.sqrt((x ** 2).sum(axis=axis)) else: assert norm == "max" # sparse.max will still be a sparse, @@ -280,7 +280,7 @@ def execute(cls, ctx, op): if norm == "l1": norms = xp.abs(x).sum(axis=1) elif norm == "l2": - norms = xp.sqrt((x**2).sum(axis=1)) + norms = xp.sqrt((x ** 2).sum(axis=1)) else: norms = xp.max(x, axis=1) if issparse(norms): diff --git a/mars/learn/proxima/simple_index/builder.py b/mars/learn/proxima/simple_index/builder.py index b835f64e99..1e220196bf 100644 --- a/mars/learn/proxima/simple_index/builder.py +++ b/mars/learn/proxima/simple_index/builder.py @@ -49,7 +49,7 @@ logger = logging.getLogger(__name__) -DEFAULT_INDEX_SIZE = 5 * 10**6 +DEFAULT_INDEX_SIZE = 5 * 10 ** 6 class ProximaBuilder(LearnOperand, LearnOperandMixin): @@ -338,7 +338,7 @@ def write_index(): with fs.open(out_path, "wb") as out_f: with open(path, "rb") as in_f: # 128M - chunk_bytes = 128 * 1024**2 + chunk_bytes = 128 * 1024 ** 2 while True: data = in_f.read(chunk_bytes) if data: diff --git a/mars/learn/proxima/simple_index/searcher.py b/mars/learn/proxima/simple_index/searcher.py index abcca6937a..867f894900 100644 --- a/mars/learn/proxima/simple_index/searcher.py +++ b/mars/learn/proxima/simple_index/searcher.py @@ -376,7 +376,7 @@ def _execute_download(cls, ctx, op: "ProximaSearcher"): with open(local_path, "wb") as out_f: with fs.open(index_path, "rb") as in_f: # 32M - chunk_bytes = 32 * 1024**2 + chunk_bytes = 32 * 1024 ** 2 while True: data = in_f.read(chunk_bytes) if data: diff --git a/mars/learn/utils/core.py b/mars/learn/utils/core.py index 82d3218dbb..f507fcc8d6 100644 --- a/mars/learn/utils/core.py +++ b/mars/learn/utils/core.py @@ -122,7 +122,7 @@ def get_chunk_n_rows(row_bytes, max_n_rows=None, working_memory=None): working_memory = 1024 if isinstance(working_memory, int): - working_memory *= 2**20 + working_memory *= 2 ** 20 else: working_memory = parse_readable_size(working_memory)[0] @@ -133,7 +133,7 @@ def get_chunk_n_rows(row_bytes, max_n_rows=None, working_memory=None): warnings.warn( "Could not adhere to working_memory config. " "Currently %.0fMiB, %.0fMiB required." - % (working_memory, np.ceil(row_bytes * 2**-20)) + % (working_memory, np.ceil(row_bytes * 2 ** -20)) ) chunk_n_rows = 1 return chunk_n_rows diff --git a/mars/lib/aio/lru.py b/mars/lib/aio/lru.py index 46f8ed2327..2fa6c7aa7f 100644 --- a/mars/lib/aio/lru.py +++ b/mars/lib/aio/lru.py @@ -125,12 +125,7 @@ def _close_waited(wrapped, _): def _cache_info(wrapped, maxsize): - return _CacheInfo( - wrapped.hits, - wrapped.misses, - maxsize, - len(wrapped._cache), - ) + return _CacheInfo(wrapped.hits, wrapped.misses, maxsize, len(wrapped._cache),) def __cache_touch(wrapped, key): @@ -151,11 +146,7 @@ def _cache_miss(wrapped, key): def alru_cache( - fn=None, - maxsize=128, - typed=False, - *, - cache_exceptions=True, + fn=None, maxsize=128, typed=False, *, cache_exceptions=True, ): def wrapper(fn): _origin = unpartial(fn) diff --git a/mars/lib/bloom_filter.py b/mars/lib/bloom_filter.py index 13fb1be7b2..a6d6492140 100644 --- a/mars/lib/bloom_filter.py +++ b/mars/lib/bloom_filter.py @@ -55,7 +55,7 @@ class Mmap_backend(object): Please note that this has only been tested on Linux so far. """ - effs = 2**8 - 1 + effs = 2 ** 8 - 1 def __init__(self, num_bits, filename): if not HAVE_MMAP: @@ -119,7 +119,7 @@ def close(self): class File_seek_backend(object): """Backend storage for our "array of bits" using a file in which we seek""" - effs = 2**8 - 1 + effs = 2 ** 8 - 1 def __init__(self, num_bits, filename): self.num_bits = num_bits @@ -201,7 +201,7 @@ class Array_then_file_seek_backend(object): RAM. On close, we write from RAM to the file. """ - effs = 2**8 - 1 + effs = 2 ** 8 - 1 def __init__(self, num_bits, filename, max_bytes_in_memory): self.num_bits = num_bits @@ -223,7 +223,7 @@ def __init__(self, num_bits, filename, max_bytes_in_memory): os.lseek(self.file_, 0, os.SEEK_SET) offset = 0 - intended_block_len = 2**17 + intended_block_len = 2 ** 17 while True: if offset + intended_block_len < self.bytes_in_memory: block = os.read(self.file_, intended_block_len) @@ -315,7 +315,7 @@ class Array_backend(object): # Note that this has now been split out into a bits_mod for the benefit of # other projects. - effs = 2**32 - 1 + effs = 2 ** 32 - 1 def __init__(self, num_bits): self.num_bits = num_bits @@ -377,8 +377,8 @@ def get_bitno_seed_rnd(bloom_filter, key): yield bitno % bloom_filter.num_bits_m -MERSENNES1 = [2**x - 1 for x in [17, 31, 127]] -MERSENNES2 = [2**x - 1 for x in [19, 67, 257]] +MERSENNES1 = [2 ** x - 1 for x in [17, 31, 127]] +MERSENNES2 = [2 ** x - 1 for x in [19, 67, 257]] def simple_hash(int_list, prime1, prime2, prime3): @@ -488,9 +488,7 @@ def __init__( self.backend = Mmap_backend(self.num_bits_m, filename[0]) else: self.backend = Array_then_file_seek_backend( - self.num_bits_m, - filename[0], - filename[1], + self.num_bits_m, filename[0], filename[1], ) else: if start_fresh: @@ -507,11 +505,7 @@ def __init__( def __repr__(self): return ( "BloomFilter(ideal_num_elements_n=%d, error_rate_p=%f, " + "num_bits_m=%d)" - ) % ( - self.ideal_num_elements_n, - self.error_rate_p, - self.num_bits_m, - ) + ) % (self.ideal_num_elements_n, self.error_rate_p, self.num_bits_m,) def add(self, key): """Add an element to the filter""" diff --git a/mars/lib/filesystem/tests/test_s3.py b/mars/lib/filesystem/tests/test_s3.py index 14d63f19dc..edd841b86b 100644 --- a/mars/lib/filesystem/tests/test_s3.py +++ b/mars/lib/filesystem/tests/test_s3.py @@ -32,6 +32,7 @@ def __init__(self, **kwargs): super().__init__(**kwargs) raise KwArgsException(kwargs) + else: TestS3FileSystem = None diff --git a/mars/lib/sparse/__init__.py b/mars/lib/sparse/__init__.py index 8ecb44980b..4583417cf9 100644 --- a/mars/lib/sparse/__init__.py +++ b/mars/lib/sparse/__init__.py @@ -90,7 +90,7 @@ def floor_divide(a, b, **_): def power(a, b, **_): try: - return a**b + return a ** b except TypeError: if hasattr(b, "__rpow__"): return b.__rpow__(a) diff --git a/mars/lib/sparse/array.py b/mars/lib/sparse/array.py index ddd71d383a..265601a7fb 100644 --- a/mars/lib/sparse/array.py +++ b/mars/lib/sparse/array.py @@ -1497,7 +1497,7 @@ def ldexp(self, other): if issparse(naked_other): naked_other = other.toarray() - return SparseNDArray(self.spmatrix.multiply(2**naked_other)) + return SparseNDArray(self.spmatrix.multiply(2 ** naked_other)) def frexp(self, **kw): xp = get_array_module(self.spmatrix) diff --git a/mars/lib/sparse/tests/test_sparse.py b/mars/lib/sparse/tests/test_sparse.py index c0055115ad..7b1dba2500 100644 --- a/mars/lib/sparse/tests/test_sparse.py +++ b/mars/lib/sparse/tests/test_sparse.py @@ -192,21 +192,21 @@ def test_sparse_power(): s1 = SparseNDArray(s1_data) s2 = SparseNDArray(s2_data) - assert_array_equal(s1**s2, s1.toarray() ** s2.toarray()) - assert_array_equal(s1**d1, s1.toarray() ** d1) - assert_array_equal(d1**s1, d1 ** s1.toarray()) - assert_array_equal(s1**2, s1_data.power(2)) - assert_array_equal(2**s1, 2 ** s1.toarray()) + assert_array_equal(s1 ** s2, s1.toarray() ** s2.toarray()) + assert_array_equal(s1 ** d1, s1.toarray() ** d1) + assert_array_equal(d1 ** s1, d1 ** s1.toarray()) + assert_array_equal(s1 ** 2, s1_data.power(2)) + assert_array_equal(2 ** s1, 2 ** s1.toarray()) # test sparse vector v = SparseNDArray(v1, shape=(3,)) - assert_array_equal(v**v, v1_data**v1_data) - assert_array_equal(v**d1, v1_data**d1) - assert_array_equal(d1**v, d1**v1_data) - r = sps.csr_matrix(((v1.data**1), v1.indices, v1.indptr), v1.shape) - assert_array_equal(v**1, r.toarray().reshape(3)) - r = sps.csr_matrix(((1**v1.data), v1.indices, v1.indptr), v1.shape) - assert_array_equal(1**v, r.toarray().reshape(3)) + assert_array_equal(v ** v, v1_data ** v1_data) + assert_array_equal(v ** d1, v1_data ** d1) + assert_array_equal(d1 ** v, d1 ** v1_data) + r = sps.csr_matrix(((v1.data ** 1), v1.indices, v1.indptr), v1.shape) + assert_array_equal(v ** 1, r.toarray().reshape(3)) + r = sps.csr_matrix(((1 ** v1.data), v1.indices, v1.indptr), v1.shape) + assert_array_equal(1 ** v, r.toarray().reshape(3)) def test_sparse_mod(): diff --git a/mars/lib/tblib/__init__.py b/mars/lib/tblib/__init__.py index 7e717b43c3..7843a17bb5 100644 --- a/mars/lib/tblib/__init__.py +++ b/mars/lib/tblib/__init__.py @@ -16,11 +16,13 @@ if not tb_set_next and not tproxy: raise ImportError("Cannot use tblib. Runtime not supported.") -__version__ = '1.7.0' -__all__ = 'Traceback', 'TracebackParseError', 'Frame', 'Code' +__version__ = "1.7.0" +__all__ = "Traceback", "TracebackParseError", "Frame", "Code" PY3 = sys.version_info[0] == 3 -FRAME_RE = re.compile(r'^\s*File "(?P.+)", line (?P\d+)(, in (?P.+))?$') +FRAME_RE = re.compile( + r'^\s*File "(?P.+)", line (?P\d+)(, in (?P.+))?$' +) class _AttrDict(dict): @@ -46,6 +48,7 @@ class Code(object): """ Class that replicates just enough of the builtin Code object to enable serialization and traceback rendering. """ + co_code = None def __init__(self, code): @@ -64,7 +67,7 @@ def __tproxy__(self, operation, *args, **kwargs): """ Necessary for PyPy's tproxy. """ - if operation in ('__getattribute__', '__getattr__'): + if operation in ("__getattribute__", "__getattr__"): return getattr(self, args[0]) else: return getattr(self, operation)(*args, **kwargs) @@ -74,12 +77,11 @@ class Frame(object): """ Class that replicates just enough of the builtin Frame object to enable serialization and traceback rendering. """ + def __init__(self, frame): self.f_locals = {} self.f_globals = { - k: v - for k, v in frame.f_globals.items() - if k in ("__file__", "__name__") + k: v for k, v in frame.f_globals.items() if k in ("__file__", "__name__") } self.f_code = Code(frame.f_code) self.f_lineno = frame.f_lineno @@ -97,8 +99,8 @@ def __tproxy__(self, operation, *args, **kwargs): """ Necessary for PyPy's tproxy. """ - if operation in ('__getattribute__', '__getattr__'): - if args[0] == 'f_code': + if operation in ("__getattribute__", "__getattr__"): + if args[0] == "f_code": return tproxy(CodeType, self.f_code.__tproxy__) else: return getattr(self, args[0]) @@ -110,6 +112,7 @@ class Traceback(object): """ Class that wraps builtin Traceback objects. """ + tb_next = None def __init__(self, tb): @@ -143,27 +146,54 @@ def as_traceback(self): tb = None while current: f_code = current.tb_frame.f_code - code = compile('\n' * (current.tb_lineno - 1) + 'raise __traceback_maker', current.tb_frame.f_code.co_filename, 'exec') + code = compile( + "\n" * (current.tb_lineno - 1) + "raise __traceback_maker", + current.tb_frame.f_code.co_filename, + "exec", + ) if hasattr(code, "replace"): # Python 3.8 and newer - code = code.replace(co_argcount=0, - co_filename=f_code.co_filename, co_name=f_code.co_name, - co_freevars=(), co_cellvars=()) + code = code.replace( + co_argcount=0, + co_filename=f_code.co_filename, + co_name=f_code.co_name, + co_freevars=(), + co_cellvars=(), + ) elif PY3: code = CodeType( - 0, code.co_kwonlyargcount, - code.co_nlocals, code.co_stacksize, code.co_flags, - code.co_code, code.co_consts, code.co_names, code.co_varnames, - f_code.co_filename, f_code.co_name, - code.co_firstlineno, code.co_lnotab, (), () + 0, + code.co_kwonlyargcount, + code.co_nlocals, + code.co_stacksize, + code.co_flags, + code.co_code, + code.co_consts, + code.co_names, + code.co_varnames, + f_code.co_filename, + f_code.co_name, + code.co_firstlineno, + code.co_lnotab, + (), + (), ) else: code = CodeType( 0, - code.co_nlocals, code.co_stacksize, code.co_flags, - code.co_code, code.co_consts, code.co_names, code.co_varnames, - f_code.co_filename.encode(), f_code.co_name.encode(), - code.co_firstlineno, code.co_lnotab, (), () + code.co_nlocals, + code.co_stacksize, + code.co_flags, + code.co_code, + code.co_consts, + code.co_names, + code.co_varnames, + f_code.co_filename.encode(), + f_code.co_name.encode(), + code.co_firstlineno, + code.co_lnotab, + (), + (), ) # noinspection PyBroadException @@ -184,6 +214,7 @@ def as_traceback(self): finally: del top_tb del tb + to_traceback = as_traceback # noinspection SpellCheckingInspection @@ -191,10 +222,10 @@ def __tproxy__(self, operation, *args, **kwargs): """ Necessary for PyPy's tproxy. """ - if operation in ('__getattribute__', '__getattr__'): - if args[0] == 'tb_next': + if operation in ("__getattribute__", "__getattr__"): + if args[0] == "tb_next": return self.tb_next and self.tb_next.as_traceback() - elif args[0] == 'tb_frame': + elif args[0] == "tb_frame": return tproxy(FrameType, self.tb_frame.__tproxy__) else: return getattr(self, args[0]) @@ -212,19 +243,20 @@ def as_dict(self): tb_next = self.tb_next.to_dict() code = { - 'co_filename': self.tb_frame.f_code.co_filename, - 'co_name': self.tb_frame.f_code.co_name, + "co_filename": self.tb_frame.f_code.co_filename, + "co_name": self.tb_frame.f_code.co_name, } frame = { - 'f_globals': self.tb_frame.f_globals, - 'f_code': code, - 'f_lineno': self.tb_frame.f_lineno, + "f_globals": self.tb_frame.f_globals, + "f_code": code, + "f_lineno": self.tb_frame.f_lineno, } return { - 'tb_frame': frame, - 'tb_lineno': self.tb_lineno, - 'tb_next': tb_next, + "tb_frame": frame, + "tb_lineno": self.tb_lineno, + "tb_next": tb_next, } + to_dict = as_dict @classmethod @@ -232,25 +264,21 @@ def from_dict(cls, dct): """ Creates an instance from a dictionary with the same structure as ``.as_dict()`` returns. """ - if dct['tb_next']: - tb_next = cls.from_dict(dct['tb_next']) + if dct["tb_next"]: + tb_next = cls.from_dict(dct["tb_next"]) else: tb_next = None code = _AttrDict( - co_filename=dct['tb_frame']['f_code']['co_filename'], - co_name=dct['tb_frame']['f_code']['co_name'], + co_filename=dct["tb_frame"]["f_code"]["co_filename"], + co_name=dct["tb_frame"]["f_code"]["co_name"], ) frame = _AttrDict( - f_globals=dct['tb_frame']['f_globals'], + f_globals=dct["tb_frame"]["f_globals"], f_code=code, - f_lineno=dct['tb_frame']['f_lineno'], - ) - tb = _AttrDict( - tb_frame=frame, - tb_lineno=dct['tb_lineno'], - tb_next=tb_next, + f_lineno=dct["tb_frame"]["f_lineno"], ) + tb = _AttrDict(tb_frame=frame, tb_lineno=dct["tb_lineno"], tb_next=tb_next,) return cls(tb) @classmethod @@ -265,13 +293,13 @@ def from_string(cls, string, strict=True): for line in string.splitlines(): line = line.rstrip() if header: - if line == 'Traceback (most recent call last):': + if line == "Traceback (most recent call last):": header = False continue frame_match = FRAME_RE.match(line) if frame_match: frames.append(frame_match.groupdict()) - elif line.startswith(' '): + elif line.startswith(" "): pass elif strict: break # traceback ended @@ -284,11 +312,10 @@ def from_string(cls, string, strict=True): tb_frame=_AttrDict( frame, f_globals=_AttrDict( - __file__=frame['co_filename'], - __name__='?', + __file__=frame["co_filename"], __name__="?", ), f_code=_AttrDict(frame), - f_lineno=int(frame['tb_lineno']), + f_lineno=int(frame["tb_lineno"]), ), tb_next=previous, ) diff --git a/mars/lib/tblib/decorators.py b/mars/lib/tblib/decorators.py index 93bb86d3b7..77778bc971 100644 --- a/mars/lib/tblib/decorators.py +++ b/mars/lib/tblib/decorators.py @@ -29,7 +29,9 @@ def return_exceptions_wrapper(*args, **kwargs): return return_exceptions_wrapper -returns_error = return_errors = returns_errors = return_error # cause I make too many typos +returns_error = ( + return_errors +) = returns_errors = return_error # cause I make too many typos @return_error diff --git a/mars/lib/tblib/pickling_support.py b/mars/lib/tblib/pickling_support.py index cf6e390049..c3aaf5900f 100644 --- a/mars/lib/tblib/pickling_support.py +++ b/mars/lib/tblib/pickling_support.py @@ -19,7 +19,10 @@ def unpickle_traceback(tb_frame, tb_lineno, tb_next): def pickle_traceback(tb): - return unpickle_traceback, (Frame(tb.tb_frame), tb.tb_lineno, tb.tb_next and Traceback(tb.tb_next)) + return ( + unpickle_traceback, + (Frame(tb.tb_frame), tb.tb_lineno, tb.tb_next and Traceback(tb.tb_next)), + ) def unpickle_exception(func, args, cause, tb): diff --git a/mars/metrics/backends/prometheus/prometheus_metric.py b/mars/metrics/backends/prometheus/prometheus_metric.py index e67db29662..96269800d6 100644 --- a/mars/metrics/backends/prometheus/prometheus_metric.py +++ b/mars/metrics/backends/prometheus/prometheus_metric.py @@ -34,10 +34,7 @@ def _init(self): # Prometheus metric name must match the regex `[a-zA-Z_:][a-zA-Z0-9_:]*` # `.` is a common character in metrics, so here replace it with `:` self._name = self._name.replace(".", ":") - self._tag_keys = self._tag_keys + ( - "host", - "pid", - ) + self._tag_keys = self._tag_keys + ("host", "pid",) self._tags = {"host": socket.gethostname(), "pid": os.getpid()} try: self._metric = ( diff --git a/mars/optimization/logical/tileable/column_pruning/column_pruning_rule.py b/mars/optimization/logical/tileable/column_pruning/column_pruning_rule.py index ae0fb7128c..a4657f64da 100644 --- a/mars/optimization/logical/tileable/column_pruning/column_pruning_rule.py +++ b/mars/optimization/logical/tileable/column_pruning/column_pruning_rule.py @@ -156,9 +156,7 @@ def _prune_columns(self) -> List[TileableData]: continue # new node init - new_node_op = DataFrameIndex( - col_names=pruned_columns, - ) + new_node_op = DataFrameIndex(col_names=pruned_columns,) new_params = predecessor.params.copy() new_params["shape"] = ( new_params["shape"][0], diff --git a/mars/optimization/logical/tileable/tests/test_arithmetic_query.py b/mars/optimization/logical/tileable/tests/test_arithmetic_query.py index b9dceeac49..86e6a160ed 100644 --- a/mars/optimization/logical/tileable/tests/test_arithmetic_query.py +++ b/mars/optimization/logical/tileable/tests/test_arithmetic_query.py @@ -169,11 +169,7 @@ def test_eval_setitem_to_eval(setup): records = optimize(graph) opt_df3 = records.get_optimization_result(df3.data) assert opt_df3.op.expr == "\n".join( - [ - "`K` = (`A`) * ((1) - (`B`))", - "`L` = (`K`) - (`A`)", - "`M` = (`K`) + (`L`)", - ] + ["`K` = (`A`) * ((1) - (`B`))", "`L` = (`K`) - (`A`)", "`M` = (`K`) + (`L`)",] ) assert len(graph) == 4 assert len([n for n in graph if isinstance(n.op, DataFrameEval)]) == 1 diff --git a/mars/optimization/physical/numexpr.py b/mars/optimization/physical/numexpr.py index 8294b9da55..a4cdfea42b 100644 --- a/mars/optimization/physical/numexpr.py +++ b/mars/optimization/physical/numexpr.py @@ -114,10 +114,7 @@ def _can_fuse(node: ChunkType): def _collect_fuse( - graph: ChunkGraph, - node: ChunkType, - graph_results: Set[ChunkType], - cached_can_fuse, + graph: ChunkGraph, node: ChunkType, graph_results: Set[ChunkType], cached_can_fuse, ): fuse_graph = ChunkGraph() fuse_graph.add_node(node) @@ -227,10 +224,7 @@ def _fuse_nodes(self, fuses: List[_Fuse], fuse_cls): dtype=tail_chunk.dtype, ) fused_chunk = fuse_op.new_chunk( - inputs, - kws=[tail_chunk.params], - _key=tail_chunk.key, - _chunk=tail_chunk, + inputs, kws=[tail_chunk.params], _key=tail_chunk.key, _chunk=tail_chunk, ).data graph.add_node(fused_chunk) diff --git a/mars/oscar/backends/communication/utils.py b/mars/oscar/backends/communication/utils.py index 3add2e4d41..8c189acf13 100644 --- a/mars/oscar/backends/communication/utils.py +++ b/mars/oscar/backends/communication/utils.py @@ -24,7 +24,7 @@ cudf = lazy_import("cudf") rmm = lazy_import("rmm") -CUDA_CHUNK_SIZE = 16 * 1024**2 +CUDA_CHUNK_SIZE = 16 * 1024 ** 2 def _convert_to_cupy_ndarray( diff --git a/mars/oscar/backends/config.py b/mars/oscar/backends/config.py index a3e16a4fe9..4fb16e7271 100644 --- a/mars/oscar/backends/config.py +++ b/mars/oscar/backends/config.py @@ -83,9 +83,7 @@ def get_process_index(self, external_address: str): ) # pragma: no cover def reset_pool_external_address( - self, - process_index: int, - external_address: Union[str, List[str]], + self, process_index: int, external_address: Union[str, List[str]], ): if not isinstance(external_address, list): external_address = [external_address] diff --git a/mars/oscar/backends/mars/tests/test_debug.py b/mars/oscar/backends/mars/tests/test_debug.py index c73836e656..e0f4d72f6d 100644 --- a/mars/oscar/backends/mars/tests/test_debug.py +++ b/mars/oscar/backends/mars/tests/test_debug.py @@ -89,9 +89,7 @@ async def debug_logger(): try: mo.set_debug_options( mo.DebugOptions( - actor_call_timeout=1, - log_unhandled_errors=True, - log_cycle_send=True, + actor_call_timeout=1, log_unhandled_errors=True, log_cycle_send=True, ) ) yield log_file diff --git a/mars/oscar/backends/message.pyi b/mars/oscar/backends/message.pyi index 9b035f666e..6a9bf4c0ea 100644 --- a/mars/oscar/backends/message.pyi +++ b/mars/oscar/backends/message.pyi @@ -46,7 +46,6 @@ class _MessageBase: message_id: bytes message_trace: list profiling_context: Any - def __init__( self, message_id: bytes = None, @@ -62,7 +61,6 @@ class ControlMessage(_MessageBase): address: str control_message_type: ControlMessageType content: Any - def __init__( self, message_id: bytes = None, @@ -77,7 +75,6 @@ class ResultMessage(_MessageBase): message_type = MessageType.result result: Any - def __init__( self, message_id: bytes = None, @@ -95,7 +92,6 @@ class ErrorMessage(_MessageBase): error_type: Type error: BaseException traceback: TracebackType - def __init__( self, message_id: bytes = None, @@ -118,7 +114,6 @@ class CreateActorMessage(_MessageBase): kwargs: dict allocate_strategy: Any from_main: bool - def __init__( self, message_id: bytes = None, @@ -137,7 +132,6 @@ class DestroyActorMessage(_MessageBase): actor_ref: ActorRef from_main: bool - def __init__( self, message_id: bytes = None, @@ -151,7 +145,6 @@ class HasActorMessage(_MessageBase): message_type = MessageType.has_actor actor_ref: ActorRef - def __init__( self, message_id: bytes = None, @@ -164,7 +157,6 @@ class ActorRefMessage(_MessageBase): message_type = MessageType.actor_ref actor_ref: ActorRef - def __init__( self, message_id: bytes = None, @@ -178,7 +170,6 @@ class SendMessage(_MessageBase): actor_ref: ActorRef content: Any - def __init__( self, message_id: bytes = None, @@ -197,7 +188,6 @@ class CancelMessage(_MessageBase): address: str cancel_message_id: bytes - def __init__( self, message_id: bytes = None, diff --git a/mars/oscar/backends/ray/communication.py b/mars/oscar/backends/ray/communication.py index 85f2f069be..e636e730e4 100644 --- a/mars/oscar/backends/ray/communication.py +++ b/mars/oscar/backends/ray/communication.py @@ -94,8 +94,9 @@ def __init__(self, message): self.message = message def __reduce__(self): - return _argwrapper_unpickler, ( - serialize(self.message, context={"serializer": "ray"}), + return ( + _argwrapper_unpickler, + (serialize(self.message, context={"serializer": "ray"}),), ) @@ -104,8 +105,7 @@ def _init_ray_serialization_deserialization(): _ray_serialize = ray.serialization.SerializationContext.serialize _ray_deserialize_object = ray.serialization.SerializationContext._deserialize_object serialized_bytes_counter = Metrics.counter( - "mars.channel_serialized_bytes", - "The bytes serialized by mars ray channel.", + "mars.channel_serialized_bytes", "The bytes serialized by mars ray channel.", ) deserialized_bytes_counter = Metrics.counter( "mars.channel_deserialized_bytes", diff --git a/mars/oscar/backends/ray/pool.py b/mars/oscar/backends/ray/pool.py index 820b3757da..65258cd0ac 100644 --- a/mars/oscar/backends/ray/pool.py +++ b/mars/oscar/backends/ray/pool.py @@ -85,9 +85,7 @@ def gen_internal_address( @classmethod def create_sub_pool( - cls, - main_pool_address, - sub_pool_address, + cls, main_pool_address, sub_pool_address, ): pg_name, bundle_index, process_index = process_address_to_placement( sub_pool_address diff --git a/mars/oscar/tests/test_actorcaller.py b/mars/oscar/tests/test_actorcaller.py index c59d677080..bd4e523c08 100644 --- a/mars/oscar/tests/test_actorcaller.py +++ b/mars/oscar/tests/test_actorcaller.py @@ -55,10 +55,7 @@ def __init__(self, id_num): caller = ActorCaller() - router = Router( - external_addresses=["test1"], - local_address="test2", - ) + router = Router(external_addresses=["test1"], local_address="test2",) futures = [] for index in range(2): futures.append( diff --git a/mars/oscar/tests/test_batch.py b/mars/oscar/tests/test_batch.py index 3abbec354f..188a23d618 100644 --- a/mars/oscar/tests/test_batch.py +++ b/mars/oscar/tests/test_batch.py @@ -76,8 +76,7 @@ def method(self, args_list, kwargs_list): test_inst = TestClass() test_inst.method.batch( - test_inst.method.delay(20), - test_inst.method.delay(30, 5), + test_inst.method.delay(20), test_inst.method.delay(30, 5), ) assert test_inst.a_list == [20, 30] assert test_inst.b_list == [10, 5] diff --git a/mars/serialization/tests/test_serial.py b/mars/serialization/tests/test_serial.py index 75bc92fcf3..32aa1127f7 100644 --- a/mars/serialization/tests/test_serial.py +++ b/mars/serialization/tests/test_serial.py @@ -178,8 +178,7 @@ def test_arrow(): @pytest.mark.parametrize( - "np_val", - [np.random.rand(100, 100), np.random.rand(100, 100).T], + "np_val", [np.random.rand(100, 100), np.random.rand(100, 100).T], ) @require_cupy def test_cupy(np_val): diff --git a/mars/services/cluster/api/web.py b/mars/services/cluster/api/web.py index 765cae5eef..83d27030ff 100644 --- a/mars/services/cluster/api/web.py +++ b/mars/services/cluster/api/web.py @@ -152,14 +152,7 @@ async def get_node_thread_stacks(self): cluster_api = await self._get_cluster_api() address = self.get_argument("address", "") or None stacks = list(await cluster_api.get_node_thread_stacks(address)) - self.write( - json.dumps( - { - "generate_time": time.time(), - "stacks": stacks, - } - ) - ) + self.write(json.dumps({"generate_time": time.time(), "stacks": stacks,})) web_handlers = {ClusterWebAPIHandler.get_root_pattern(): ClusterWebAPIHandler} diff --git a/mars/services/cluster/gather.py b/mars/services/cluster/gather.py index fe891aa1ec..9cfc52b4a8 100644 --- a/mars/services/cluster/gather.py +++ b/mars/services/cluster/gather.py @@ -125,10 +125,7 @@ def gather_node_env(): mars_resource.cuda_card_stats() ): # pragma: no cover bands[f"gpu-{idx}"] = { - "resources": { - "gpu": 1, - "memory": gpu_card_stat.fb_mem_info.total, - } + "resources": {"gpu": 1, "memory": gpu_card_stat.fb_mem_info.total,} } return node_info diff --git a/mars/services/cluster/tests/test_locator.py b/mars/services/cluster/tests/test_locator.py index 304ccf98b1..5ddaa888aa 100644 --- a/mars/services/cluster/tests/test_locator.py +++ b/mars/services/cluster/tests/test_locator.py @@ -116,8 +116,7 @@ async def test_supervisor_peer_locator(actor_pool, temp_address_file): # test watch nodes changes version, result = await asyncio.wait_for( - locator_ref.watch_supervisors_by_keys(["mock_name"]), - timeout=30, + locator_ref.watch_supervisors_by_keys(["mock_name"]), timeout=30, ) assert result[0] in addresses @@ -177,8 +176,7 @@ async def test_worker_supervisor_locator(actor_pool, temp_address_file): supervisors = await locator_ref.get_supervisors(filter_ready=False) assert supervisors == addresses version, result = await asyncio.wait_for( - locator_ref.watch_supervisors_by_keys(["mock_name"]), - timeout=30, + locator_ref.watch_supervisors_by_keys(["mock_name"]), timeout=30, ) assert result[0] in addresses diff --git a/mars/services/cluster/tests/test_service.py b/mars/services/cluster/tests/test_service.py index 2e7df7a6cf..ec0e5757c7 100644 --- a/mars/services/cluster/tests/test_service.py +++ b/mars/services/cluster/tests/test_service.py @@ -43,10 +43,7 @@ async def test_cluster_service(actor_pools): config = { "services": ["cluster"], - "cluster": { - "backend": "fixed", - "lookup_address": sv_pool.external_address, - }, + "cluster": {"backend": "fixed", "lookup_address": sv_pool.external_address,}, } await start_services(NodeRole.SUPERVISOR, config, address=sv_pool.external_address) await start_services(NodeRole.WORKER, config, address=worker_pool.external_address) diff --git a/mars/services/meta/api/oscar.py b/mars/services/meta/api/oscar.py index cfa58ea7e5..1a75181832 100644 --- a/mars/services/meta/api/oscar.py +++ b/mars/services/meta/api/oscar.py @@ -307,8 +307,8 @@ async def create(cls, session_id: str, address: str) -> "WorkerMetaAPI": worker_meta_store_manager_ref = await mo.actor_ref( uid=WorkerMetaStoreManagerActor.default_uid(), address=address ) - worker_meta_store_ref = ( - await worker_meta_store_manager_ref.new_session_meta_store(session_id) + worker_meta_store_ref = await worker_meta_store_manager_ref.new_session_meta_store( + session_id ) return WorkerMetaAPI(session_id, worker_meta_store_ref) diff --git a/mars/services/meta/tests/test_api.py b/mars/services/meta/tests/test_api.py index d3be88f513..bba248c9dc 100644 --- a/mars/services/meta/tests/test_api.py +++ b/mars/services/meta/tests/test_api.py @@ -139,14 +139,9 @@ async def test_meta_web_api(): async with pool: config = { "services": ["cluster", "session", "meta", "web"], - "cluster": { - "backend": "fixed", - "lookup_address": pool.external_address, - }, + "cluster": {"backend": "fixed", "lookup_address": pool.external_address,}, "meta": {"store": "dict"}, - "web": { - "port": web_port, - }, + "web": {"port": web_port,}, } await start_services(NodeRole.SUPERVISOR, config, address=pool.external_address) diff --git a/mars/services/meta/tests/test_service.py b/mars/services/meta/tests/test_service.py index d0f0978afb..3a9fb121e8 100644 --- a/mars/services/meta/tests/test_service.py +++ b/mars/services/meta/tests/test_service.py @@ -29,10 +29,7 @@ async def test_meta_service(): async with pool, worker_pool: config = { "services": ["cluster", "session", "meta"], - "cluster": { - "backend": "fixed", - "lookup_address": pool.external_address, - }, + "cluster": {"backend": "fixed", "lookup_address": pool.external_address,}, "meta": {"store": "dict"}, } await start_services(NodeRole.SUPERVISOR, config, address=pool.external_address) diff --git a/mars/services/scheduling/api/web.py b/mars/services/scheduling/api/web.py index 552c434e24..e48a717ab2 100644 --- a/mars/services/scheduling/api/web.py +++ b/mars/services/scheduling/api/web.py @@ -51,10 +51,7 @@ async def get_subtask_schedule_summaries(self, session_id: str): "task_id": summary.task_id, "subtask_id": summary.subtask_id, "bands": [ - { - "endpoint": band[0], - "band_name": band[1], - } + {"endpoint": band[0], "band_name": band[1],} for band in summary.bands ], "num_reschedules": summary.num_reschedules, diff --git a/mars/services/scheduling/supervisor/autoscale.py b/mars/services/scheduling/supervisor/autoscale.py index 8ea9e46d80..3bd8c81f55 100644 --- a/mars/services/scheduling/supervisor/autoscale.py +++ b/mars/services/scheduling/supervisor/autoscale.py @@ -346,7 +346,7 @@ async def _scale_out(self, queueing_refs): while any( [await queueing_ref.all_bands_busy() for queueing_ref in queueing_refs] ): - worker_num = 2**rnd + worker_num = 2 ** rnd if ( self._autoscaler.get_dynamic_worker_nums() + worker_num > self._max_workers diff --git a/mars/services/scheduling/supervisor/tests/test_speculation.py b/mars/services/scheduling/supervisor/tests/test_speculation.py index d9238ada0c..2d66131d24 100644 --- a/mars/services/scheduling/supervisor/tests/test_speculation.py +++ b/mars/services/scheduling/supervisor/tests/test_speculation.py @@ -67,8 +67,7 @@ async def actor_pool(): address=pool.external_address, ) queue_ref = await mo.create_actor( - MockSubtaskQueueingActor, - address=pool.external_address, + MockSubtaskQueueingActor, address=pool.external_address, ) try: yield pool, cluster_api, session_id, slots_ref, queue_ref diff --git a/mars/services/scheduling/worker/tests/test_execution.py b/mars/services/scheduling/worker/tests/test_execution.py index 1bda5b3f80..057a4f6705 100644 --- a/mars/services/scheduling/worker/tests/test_execution.py +++ b/mars/services/scheduling/worker/tests/test_execution.py @@ -436,14 +436,10 @@ def test_estimate_size(): index_value = parse_index(pd.Index([10, 20, 30], dtype=np.int64)) - input1 = DataFrameFetch( - output_types=[OutputType.series], - ).new_chunk( + input1 = DataFrameFetch(output_types=[OutputType.series],).new_chunk( [], _key="INPUT1", shape=(np.nan,), dtype=np.dtype("O"), index_value=index_value ) - input2 = DataFrameFetch( - output_types=[OutputType.series], - ).new_chunk( + input2 = DataFrameFetch(output_types=[OutputType.series],).new_chunk( [], _key="INPUT2", shape=(np.nan,), dtype=np.dtype("O"), index_value=index_value ) result_chunk = DataFrameAdd( @@ -509,8 +505,7 @@ def check_fun(): await asyncio.sleep(0.5) await asyncio.wait_for( - execution_ref.cancel_subtask(subtask.subtask_id, kill_timeout=1), - timeout=30, + execution_ref.cancel_subtask(subtask.subtask_id, kill_timeout=1), timeout=30, ) r = await asyncio.wait_for(aiotask, timeout=30) assert r.status == SubtaskStatus.cancelled diff --git a/mars/services/session/supervisor/core.py b/mars/services/session/supervisor/core.py index 21da90b2f3..384cf6dd92 100644 --- a/mars/services/session/supervisor/core.py +++ b/mars/services/session/supervisor/core.py @@ -169,9 +169,7 @@ async def __post_create__(self): uid=CustomLogMetaActor.gen_uid(self._session_id), ) logger.debug( - "Session %s actor created on pid: %s", - self._session_id, - os.getpid(), + "Session %s actor created on pid: %s", self._session_id, os.getpid(), ) async def remove(self): diff --git a/mars/services/session/tests/test_service.py b/mars/services/session/tests/test_service.py index 0192613a32..4f31bdfc4c 100644 --- a/mars/services/session/tests/test_service.py +++ b/mars/services/session/tests/test_service.py @@ -36,10 +36,7 @@ async def test_session_service(test_web): async with pool: config = { "services": ["cluster", "session", "meta"], - "cluster": { - "backend": "fixed", - "lookup_address": pool.external_address, - }, + "cluster": {"backend": "fixed", "lookup_address": pool.external_address,}, "meta": {"store": "dict"}, } if test_web: @@ -181,10 +178,7 @@ async def test_dmap(): "task", "mutable", ], - "cluster": { - "backend": "fixed", - "lookup_address": pool.external_address, - }, + "cluster": {"backend": "fixed", "lookup_address": pool.external_address,}, "meta": {"store": "dict"}, } await start_services(NodeRole.SUPERVISOR, config, address=pool.external_address) diff --git a/mars/services/storage/api/web.py b/mars/services/storage/api/web.py index 1a22a07314..ff5efcac09 100644 --- a/mars/services/storage/api/web.py +++ b/mars/services/storage/api/web.py @@ -136,9 +136,7 @@ async def get_batch(self, args_list, kwargs_list): path = f"{self._address}/api/session/{self._session_id}/storage/batch/get" res = await self._request_url( - path=path, - method="POST", - data=serialize_serializable(get_chunks), + path=path, method="POST", data=serialize_serializable(get_chunks), ) return deserialize_serializable(res.body) diff --git a/mars/services/storage/tests/test_service.py b/mars/services/storage/tests/test_service.py index fb85080d9e..5ed285bdbe 100644 --- a/mars/services/storage/tests/test_service.py +++ b/mars/services/storage/tests/test_service.py @@ -61,9 +61,7 @@ async def test_storage_service(actor_pools): config = { "services": ["storage"], - "storage": { - "backends": ["shared_memory"], - }, + "storage": {"backends": ["shared_memory"],}, } await start_services(NodeRole.WORKER, config, address=worker_pool.external_address) diff --git a/mars/services/storage/transfer.py b/mars/services/storage/transfer.py index 18f88b4cca..5794d3000d 100644 --- a/mars/services/storage/transfer.py +++ b/mars/services/storage/transfer.py @@ -24,7 +24,7 @@ from .core import DataManagerActor, WrappedStorageFileObject from .handler import StorageHandlerActor -DEFAULT_TRANSFER_BLOCK_SIZE = 4 * 1024**2 +DEFAULT_TRANSFER_BLOCK_SIZE = 4 * 1024 ** 2 logger = logging.getLogger(__name__) diff --git a/mars/services/subtask/worker/runner.py b/mars/services/subtask/worker/runner.py index dbe7ac7236..5804c38820 100644 --- a/mars/services/subtask/worker/runner.py +++ b/mars/services/subtask/worker/runner.py @@ -116,8 +116,7 @@ async def run_subtask(self, subtask: Subtask): # when recovering actor pools, the actor created in sub pools # may be recovered already self._session_id_to_processors[session_id] = await mo.actor_ref( - uid=SubtaskProcessorActor.gen_uid(session_id), - address=self.address, + uid=SubtaskProcessorActor.gen_uid(session_id), address=self.address, ) processor = self._session_id_to_processors[session_id] try: diff --git a/mars/services/task/api/web.py b/mars/services/task/api/web.py index 21dffd8e0c..329c655e0c 100644 --- a/mars/services/task/api/web.py +++ b/mars/services/task/api/web.py @@ -83,9 +83,7 @@ async def submit_tileable_graph(self, session_id: str): oscar_api = await self._get_oscar_task_api(session_id) task_id = await oscar_api.submit_tileable_graph( - graph, - fuse_enabled=fuse_enabled, - extra_config=extra_config, + graph, fuse_enabled=fuse_enabled, extra_config=extra_config, ) self.write(task_id) @@ -218,11 +216,7 @@ async def submit_tileable_graph( serialize_serializable(extra_config) if extra_config else None ) body = serialize_serializable( - { - "fuse": fuse_enabled, - "graph": graph, - "extra_config": extra_config_ser, - } + {"fuse": fuse_enabled, "graph": graph, "extra_config": extra_config_ser,} ) res = await self._request_url( path=path, diff --git a/mars/services/task/execution/api.py b/mars/services/task/execution/api.py index 312c22a371..8885f735f3 100644 --- a/mars/services/task/execution/api.py +++ b/mars/services/task/execution/api.py @@ -57,8 +57,7 @@ def merge_from(self, execution_config: "ExecutionConfig") -> "ExecutionConfig": assert isinstance(execution_config, ExecutionConfig) assert self.backend == execution_config.backend merge_dict( - self._config, - execution_config.get_config_dict(), + self._config, execution_config.get_config_dict(), ) return self diff --git a/mars/services/task/execution/mars/executor.py b/mars/services/task/execution/mars/executor.py index ad4354a314..9c70a4977c 100644 --- a/mars/services/task/execution/mars/executor.py +++ b/mars/services/task/execution/mars/executor.py @@ -27,9 +27,7 @@ ShuffleProxy, ) from .....lib.aio import alru_cache -from .....oscar.profiling import ( - ProfilingData, -) +from .....oscar.profiling import ProfilingData from .....resource import Resource from .....typing import TileableType, BandType from .....utils import Timer diff --git a/mars/services/task/execution/ray/executor.py b/mars/services/task/execution/ray/executor.py index 432180768e..f946f5dc0c 100644 --- a/mars/services/task/execution/ray/executor.py +++ b/mars/services/task/execution/ray/executor.py @@ -75,12 +75,10 @@ ("session_id", "task_id", "stage_id"), ) started_subtask_number = Metrics.counter( - "mars.ray_dag.started_subtask_number", - "The number of started subtask.", + "mars.ray_dag.started_subtask_number", "The number of started subtask.", ) completed_subtask_number = Metrics.counter( - "mars.ray_dag.completed_subtask_number", - "The number of completed subtask.", + "mars.ray_dag.completed_subtask_number", "The number of completed subtask.", ) @@ -125,9 +123,7 @@ class _SubtaskGC: """GC the inputs of subtask chunk.""" def __init__( - self, - subtask_chunk_graph: ChunkGraph, - context: RayExecutionWorkerContext, + self, subtask_chunk_graph: ChunkGraph, context: RayExecutionWorkerContext, ): self._subtask_chunk_graph = subtask_chunk_graph self._context = context @@ -457,9 +453,7 @@ def __init__( meta_api: MetaAPI, ): logger.info( - "Start task %s with GC method %s.", - task.task_id, - config.get_gc_method(), + "Start task %s with GC method %s.", task.task_id, config.get_gc_method(), ) self._config = config self._task = task diff --git a/mars/services/task/execution/ray/tests/test_ray_execution_backend.py b/mars/services/task/execution/ray/tests/test_ray_execution_backend.py index 4ae3a276f5..6d2ae45ba4 100644 --- a/mars/services/task/execution/ray/tests/test_ray_execution_backend.py +++ b/mars/services/task/execution/ray/tests/test_ray_execution_backend.py @@ -207,10 +207,7 @@ def test_ray_execute_subtask_basic(_): @pytest.mark.asyncio async def test_ray_fetcher(ray_start_regular_shared2): pd_value = pd.DataFrame( - { - "col1": [str(i) for i in range(10)], - "col2": np.random.randint(0, 100, (10,)), - } + {"col1": [str(i) for i in range(10)], "col2": np.random.randint(0, 100, (10,)),} ) pd_object_ref = ray.put(pd_value) np_value = np.asarray([1, 3, 6, 2, 4]) diff --git a/mars/services/task/supervisor/preprocessor.py b/mars/services/task/supervisor/preprocessor.py index 35b5c0026e..2645c478ca 100644 --- a/mars/services/task/supervisor/preprocessor.py +++ b/mars/services/task/supervisor/preprocessor.py @@ -60,9 +60,7 @@ def _gen_tileable_handlers(self, next_tileable_handlers: List[_TileableHandler]) break def _gen_result_chunks( - self, - chunk_graph: ChunkGraph, - next_tileable_handlers: List[_TileableHandler], + self, chunk_graph: ChunkGraph, next_tileable_handlers: List[_TileableHandler], ): if not self.cancelled: return super()._gen_result_chunks(chunk_graph, next_tileable_handlers) @@ -119,10 +117,7 @@ class TaskPreprocessor: map_reduce_id_to_infos: Dict[int, MapReduceInfo] def __init__( - self, - task: Task, - tiled_context: TileContext = None, - config: Config = None, + self, task: Task, tiled_context: TileContext = None, config: Config = None, ): self._task = task self.tileable_graph = task.tileable_graph diff --git a/mars/services/task/supervisor/processor.py b/mars/services/task/supervisor/processor.py index 39de202f2a..08a855eac3 100644 --- a/mars/services/task/supervisor/processor.py +++ b/mars/services/task/supervisor/processor.py @@ -46,10 +46,7 @@ class TaskProcessor: _stage_tileables: Set[TileableType] def __init__( - self, - task: Task, - preprocessor: TaskPreprocessor, - executor: TaskExecutor, + self, task: Task, preprocessor: TaskPreprocessor, executor: TaskExecutor, ): self._task = task self._preprocessor = preprocessor @@ -184,10 +181,7 @@ async def _iter_stage_chunk_graph(self): stage_profiler.set("total", stage_timer.duration) async def _process_stage_chunk_graph( - self, - stage_id: str, - stage_profiler, - chunk_graph: ChunkGraph, + self, stage_id: str, stage_profiler, chunk_graph: ChunkGraph, ): available_bands = await self._executor.get_available_band_resources() meta_api = self._executor._meta_api @@ -462,8 +456,7 @@ def _finish(self): serialization = ProfilingData[self._task.task_id, "serialization"] if not serialization.empty(): serialization.set( - "total", - sum(serialization.values()), + "total", sum(serialization.values()), ) data = ProfilingData.pop(self._task.task_id) self.result.profiling = { diff --git a/mars/services/task/supervisor/task.py b/mars/services/task/supervisor/task.py index fd8766d929..74918a0d43 100644 --- a/mars/services/task/supervisor/task.py +++ b/mars/services/task/supervisor/task.py @@ -314,11 +314,7 @@ async def add_task( address=self.address, tile_context=task_preprocessor.tile_context, ) - processor = self._task_processor_cls( - task, - task_preprocessor, - task_executor, - ) + processor = self._task_processor_cls(task, task_preprocessor, task_executor,) self._task_id_to_processor[task.task_id] = processor # tell self to start running diff --git a/mars/services/task/supervisor/tests/test_task_manager.py b/mars/services/task/supervisor/tests/test_task_manager.py index cd9f213a82..665bc3c717 100644 --- a/mars/services/task/supervisor/tests/test_task_manager.py +++ b/mars/services/task/supervisor/tests/test_task_manager.py @@ -79,10 +79,7 @@ async def actor_pool(): # create configuration config = ExecutionConfig.from_params( - backend=backend, - n_worker=1, - n_cpu=2, - subtask_max_retries=3, + backend=backend, n_worker=1, n_cpu=2, subtask_max_retries=3, ) await mo.create_actor( TaskConfigurationActor, @@ -677,9 +674,7 @@ async def test_dump_subtask_graph(actor_pool): next(TileableGraphBuilder(graph).build()) task_id = await manager.submit_tileable_graph( - graph, - fuse_enabled=True, - extra_config={"dump_subtask_graph": True}, + graph, fuse_enabled=True, extra_config={"dump_subtask_graph": True}, ) assert isinstance(task_id, str) diff --git a/mars/services/tests/fault_injection_patch.py b/mars/services/tests/fault_injection_patch.py index 9e4cbf1fcf..312b64841f 100644 --- a/mars/services/tests/fault_injection_patch.py +++ b/mars/services/tests/fault_injection_patch.py @@ -78,10 +78,8 @@ async def run(self): ExtraConfigKey.FAULT_INJECTION_MANAGER_NAME ) if fault_injection_manager_name is not None: - self._fault_injection_manager_ref = ( - await self._session_api.get_remote_object( - self._session_id, fault_injection_manager_name - ) + self._fault_injection_manager_ref = await self._session_api.get_remote_object( + self._session_id, fault_injection_manager_name ) return await super().run() diff --git a/mars/storage/shared_memory.py b/mars/storage/shared_memory.py index 0fd71c3b55..d1af1444ee 100644 --- a/mars/storage/shared_memory.py +++ b/mars/storage/shared_memory.py @@ -36,6 +36,7 @@ def __del__(self): if os.name != "nt" and fd >= 0: os.close(fd) + except ImportError: # pragma: no cover # allow shared_memory package to be absent SharedMemory = SharedMemoryForRead = None diff --git a/mars/tensor/arithmetic/tests/test_arithmetic_execution.py b/mars/tensor/arithmetic/tests/test_arithmetic_execution.py index 3d2711e7e9..a624235fff 100644 --- a/mars/tensor/arithmetic/tests/test_arithmetic_execution.py +++ b/mars/tensor/arithmetic/tests/test_arithmetic_execution.py @@ -368,7 +368,7 @@ def test_frexp_execution(setup): frexp(arr1, o1, o2) res1, res2 = fetch(*execute(o1, o2)) - res = res1 * 2**res2 + res = res1 * 2 ** res2 np.testing.assert_array_almost_equal(res, data1, decimal=3) data1 = sps.random(5, 9, density=0.1) diff --git a/mars/tensor/base/repeat.py b/mars/tensor/base/repeat.py index 2d2e4448ac..dd8e53b610 100644 --- a/mars/tensor/base/repeat.py +++ b/mars/tensor/base/repeat.py @@ -133,7 +133,9 @@ def tile(cls, op): rp = repeats[start:stop] size = int(rp.sum()) elif not isinstance(repeats, Integral): - rp = repeats.cix[ax_idx,] + rp = repeats.cix[ + ax_idx, + ] size = np.nan else: rp = repeats diff --git a/mars/tensor/datasource/diag.py b/mars/tensor/datasource/diag.py index b5ca00a920..2030121d82 100644 --- a/mars/tensor/datasource/diag.py +++ b/mars/tensor/datasource/diag.py @@ -140,7 +140,9 @@ def _get_nsplits(cls, op): def _get_chunk(cls, op, chunk_k, chunk_shape, chunk_idx): assert chunk_shape[0] == chunk_shape[1] input_idx = chunk_idx[1] if op.k < 0 else chunk_idx[0] - input_chunk = op.inputs[0].cix[input_idx,] + input_chunk = op.inputs[0].cix[ + input_idx, + ] op = TensorDiag(k=chunk_k, dtype=op.dtype, gpu=op.gpu, sparse=op.sparse) return op.new_chunk([input_chunk], shape=chunk_shape, index=chunk_idx) diff --git a/mars/tensor/linalg/cholesky.py b/mars/tensor/linalg/cholesky.py index c62520d5b0..621bd686ff 100644 --- a/mars/tensor/linalg/cholesky.py +++ b/mars/tensor/linalg/cholesky.py @@ -72,19 +72,13 @@ def tile(cls, op): lower_chunk = TensorZeros( dtype=tensor.dtype, shape=lower_shape, order=tensor.order.value ).new_chunk( - None, - shape=lower_shape, - index=(i, j), - order=tensor.order, + None, shape=lower_shape, index=(i, j), order=tensor.order, ) upper_shape = (in_tensor.nsplits[1][j], in_tensor.nsplits[0][i]) upper_chunk = TensorZeros( dtype=tensor.dtype, shape=upper_shape, order=tensor.order.value ).new_chunk( - None, - shape=upper_shape, - index=(j, i), - order=tensor.order, + None, shape=upper_shape, index=(j, i), order=tensor.order, ) lower_chunks[lower_chunk.index] = lower_chunk upper_chunks[upper_chunk.index] = upper_chunk diff --git a/mars/tensor/rechunk/core.py b/mars/tensor/rechunk/core.py index b55d3a4730..4e3ba755bf 100644 --- a/mars/tensor/rechunk/core.py +++ b/mars/tensor/rechunk/core.py @@ -98,8 +98,7 @@ def gen_rechunk_infos( input_chunk_shape=list(len(s) for s in inp_chunk_indexes), ) for inp_chunk_index, inp_chunk_slice in zip( - itertools.product(*inp_chunk_indexes), - itertools.product(*inp_chunk_slices), + itertools.product(*inp_chunk_indexes), itertools.product(*inp_chunk_slices), ): inp_chunk = inp.cix[tuple(inp_chunk_index)] inp_chunks.append(inp_chunk) diff --git a/mars/tensor/reduction/var.py b/mars/tensor/reduction/var.py index 8bfe2fa484..137be24a23 100644 --- a/mars/tensor/reduction/var.py +++ b/mars/tensor/reduction/var.py @@ -30,12 +30,12 @@ def reduce_var_square(var_square, avg_diff, count, op, axis, sum_func): kw = dict(axis=axis, dtype=dtype, keepdims=bool(op.keepdims)) reduced_var_square = var_square[..., moment - 2].sum(**kw) + sum_func( - count * avg_diff**moment, **kw + count * avg_diff ** moment, **kw ) for i in range(1, moment - 1): coeff = factorial(moment) / float(factorial(i) * factorial(moment - i)) reduced_var_square += coeff * sum_func( - var_square[..., moment - i - 2] * avg_diff**moment, **kw + var_square[..., moment - i - 2] * avg_diff ** moment, **kw ) return reduced_var_square diff --git a/mars/tensor/spatial/distance/squareform.py b/mars/tensor/spatial/distance/squareform.py index ee6c3ff6b2..91846eb823 100644 --- a/mars/tensor/spatial/distance/squareform.py +++ b/mars/tensor/spatial/distance/squareform.py @@ -228,9 +228,7 @@ def _tile_chunks(cls, op, chunk_size): out_indices = list(itertools.product(*(range(len(cs)) for cs in chunk_size))) for out_idx, out_shape in zip(out_indices, out_shape_iter): reduce_chunk_op = TensorSquareform( - stage=OperandStage.reduce, - dtype=out.dtype, - n_reducers=len(out_indices), + stage=OperandStage.reduce, dtype=out.dtype, n_reducers=len(out_indices), ) reduce_chunk = reduce_chunk_op.new_chunk( [proxy_chunk], shape=out_shape, index=out_idx, order=out.order diff --git a/mars/tensor/special/ellip_func_integrals.py b/mars/tensor/special/ellip_func_integrals.py index 8acead561f..cf22246d5b 100644 --- a/mars/tensor/special/ellip_func_integrals.py +++ b/mars/tensor/special/ellip_func_integrals.py @@ -151,6 +151,7 @@ def elliprj(x, y, z, p, **kwargs): op = TensorElliprj(**kwargs) return op(x, y, z, p) + except AttributeError: # These functions are not implemented before scipy v1.8 so # spsecial.func may cause AttributeError diff --git a/mars/tensor/special/tests/test_special.py b/mars/tensor/special/tests/test_special.py index d577653458..e58bf7d95d 100644 --- a/mars/tensor/special/tests/test_special.py +++ b/mars/tensor/special/tests/test_special.py @@ -99,11 +99,7 @@ def test_unary_operand_no_out(func, tensor_cls): @pytest.mark.parametrize( "func,tensor_cls", - [ - ("erfc", TensorErfc), - ("erfcx", TensorErfcx), - ("erfi", TensorErfi), - ], + [("erfc", TensorErfc), ("erfcx", TensorErfcx), ("erfi", TensorErfi),], ) def test_unary_operand_out(func, tensor_cls): sp_func = getattr(spsecial, func) diff --git a/mars/tensor/special/tests/test_special_execution.py b/mars/tensor/special/tests/test_special_execution.py index a0964cae25..ad71ce4573 100644 --- a/mars/tensor/special/tests/test_special_execution.py +++ b/mars/tensor/special/tests/test_special_execution.py @@ -306,8 +306,7 @@ def test_quintuple_execution(setup, func): @pytest.mark.parametrize( - "func", - ["fresnel", "modfresnelp", "modfresnelm", "airy", "airye", "itairy"], + "func", ["fresnel", "modfresnelp", "modfresnelm", "airy", "airye", "itairy"], ) def test_unary_tuple_execution(setup, func): sp_func = getattr(spspecial, func) diff --git a/mars/tensor/statistics/bincount.py b/mars/tensor/statistics/bincount.py index 34590a320c..566e39b3ab 100644 --- a/mars/tensor/statistics/bincount.py +++ b/mars/tensor/statistics/bincount.py @@ -154,10 +154,7 @@ def tile(cls, op: "TensorBinCount"): params = out.params.copy() params["shape"] = (tileable_right_bound,) return new_op.new_tileables( - op.inputs, - chunks=reduce_chunks, - nsplits=(tuple(reduce_nsplits),), - **params, + op.inputs, chunks=reduce_chunks, nsplits=(tuple(reduce_nsplits),), **params, ) @classmethod diff --git a/mars/tensor/statistics/histogram.py b/mars/tensor/statistics/histogram.py index baaf847396..2b47c695dd 100644 --- a/mars/tensor/statistics/histogram.py +++ b/mars/tensor/statistics/histogram.py @@ -119,7 +119,7 @@ class HistBinScottSelector(HistBinSelector): """ def __call__(self): - return (24.0 * np.pi**0.5 / self._x.size) ** (1.0 / 3.0) * mt.std(self._x) + return (24.0 * np.pi ** 0.5 / self._x.size) ** (1.0 / 3.0) * mt.std(self._x) class HistBinStoneSelector(HistBinSelector): @@ -417,12 +417,7 @@ class TensorHistogramBinEdges(TensorOperand, TensorOperandMixin): _uniform_bins = TupleField("uniform_bins") def __init__( - self, - input=None, - bins=None, - range=None, - weights=None, - **kw, + self, input=None, bins=None, range=None, weights=None, **kw, ): super().__init__(_input=input, _bins=bins, _range=range, _weights=weights, **kw) diff --git a/mars/tensor/stats/ks.py b/mars/tensor/stats/ks.py index 404d6d7bb8..b6a7301c61 100644 --- a/mars/tensor/stats/ks.py +++ b/mars/tensor/stats/ks.py @@ -350,7 +350,7 @@ def _calc_prob_2samp(d, n1, n2, alternative, mode): # pragma: no cover z = np.sqrt(en) * d # Use Hodges' suggested approximation Eqn 5.3 # Requires m to be the larger of (n1, n2) - expt = -2 * z**2 - 2 * z * (m + 2 * n) / np.sqrt(m * n * (m + n)) / 3.0 + expt = -2 * z ** 2 - 2 * z * (m + 2 * n) / np.sqrt(m * n * (m + n)) / 3.0 prob = np.exp(expt) return np.clip(prob, 0, 1) diff --git a/mars/tensor/stats/tests/test_stats_execution.py b/mars/tensor/stats/tests/test_stats_execution.py index b820383d93..23648fe8b1 100644 --- a/mars/tensor/stats/tests/test_stats_execution.py +++ b/mars/tensor/stats/tests/test_stats_execution.py @@ -141,29 +141,25 @@ def test_t_test_execution(setup): if parse_version(scipy.__version__) >= parse_version("1.6.0"): alternatives = ["less", "greater", "two-sided"] - mt_from_stats = ( - lambda a, b, alternative=None, equal_var=True: ttest_ind_from_stats( - a.mean(), - a.std(), - a.shape[0], - b.mean(), - b.std(), - b.shape[0], - alternative=alternative, - equal_var=equal_var, - ) + mt_from_stats = lambda a, b, alternative=None, equal_var=True: ttest_ind_from_stats( + a.mean(), + a.std(), + a.shape[0], + b.mean(), + b.std(), + b.shape[0], + alternative=alternative, + equal_var=equal_var, ) - sp_from_stats = ( - lambda a, b, alternative=None, equal_var=True: sp_ttest_ind_from_stats( - a.mean(), - a.std(), - a.shape[0], - b.mean(), - b.std(), - b.shape[0], - alternative=alternative, - equal_var=equal_var, - ) + sp_from_stats = lambda a, b, alternative=None, equal_var=True: sp_ttest_ind_from_stats( + a.mean(), + a.std(), + a.shape[0], + b.mean(), + b.std(), + b.shape[0], + alternative=alternative, + equal_var=equal_var, ) else: alternatives = ["two-sided"] diff --git a/mars/tensor/stats/ttest.py b/mars/tensor/stats/ttest.py index ff99c13644..5a02f329ef 100644 --- a/mars/tensor/stats/ttest.py +++ b/mars/tensor/stats/ttest.py @@ -51,7 +51,7 @@ def _unequal_var_ttest_denom(v1, n1, v2, n2): vn1 = v1 / n1 vn2 = v2 / n2 with np.errstate(divide="ignore", invalid="ignore"): - df = (vn1 + vn2) ** 2 / (vn1**2 / (n1 - 1) + vn2**2 / (n2 - 1)) + df = (vn1 + vn2) ** 2 / (vn1 ** 2 / (n1 - 1) + vn2 ** 2 / (n2 - 1)) # If df is undefined, variances are zero (assumes n1 > 0 & n2 > 0). # Hence it doesn't matter what df is as long as it's not NaN. @@ -138,9 +138,9 @@ def ttest_ind_from_stats( mean1, std1, nobs1, mean2, std2, nobs2, equal_var=True, alternative="two-sided" ): if equal_var: - df, denom = _equal_var_ttest_denom(std1**2, nobs1, std2**2, nobs2) + df, denom = _equal_var_ttest_denom(std1 ** 2, nobs1, std2 ** 2, nobs2) else: - df, denom = _unequal_var_ttest_denom(std1**2, nobs1, std2**2, nobs2) + df, denom = _unequal_var_ttest_denom(std1 ** 2, nobs1, std2 ** 2, nobs2) res = _ttest_ind_from_stats(mean1, mean2, denom, df, alternative) return ExecutableTuple(Ttest_indResult(*res)) diff --git a/mars/tensor/ufunc/ufunc.py b/mars/tensor/ufunc/ufunc.py index d3747d6e1d..92ad01ee23 100644 --- a/mars/tensor/ufunc/ufunc.py +++ b/mars/tensor/ufunc/ufunc.py @@ -67,15 +67,11 @@ def reduce(self, array, axis=0, dtype=None, out=None, keepdims=False): UFUNC_TO_TENSOR_FUNCS = { np.add: TensorUfuncDef( - arith.add, - accumulator=reduction.cumsum, - aggregator=reduction.sum, + arith.add, accumulator=reduction.cumsum, aggregator=reduction.sum, ), np.subtract: TensorUfuncDef(arith.subtract), np.multiply: TensorUfuncDef( - arith.multiply, - accumulator=reduction.cumprod, - aggregator=reduction.prod, + arith.multiply, accumulator=reduction.cumprod, aggregator=reduction.prod, ), np.divide: TensorUfuncDef(arith.divide), np.logaddexp: TensorUfuncDef( diff --git a/mars/tests/test_resource.py b/mars/tests/test_resource.py index eaf94cbacc..90c0875ef7 100644 --- a/mars/tests/test_resource.py +++ b/mars/tests/test_resource.py @@ -202,23 +202,23 @@ def write_tmp_text_file(prefix, content): def test_resource(): assert Resource(num_cpus=1) + Resource(num_cpus=1) == Resource(num_cpus=2) assert Resource(num_cpus=1) + Resource(num_gpus=1) + Resource( - mem_bytes=1024**3 - ) == Resource(num_cpus=1, num_gpus=1, mem_bytes=1024**3) - assert -Resource(num_cpus=1, num_gpus=1, mem_bytes=1024**3) == Resource( - num_cpus=-1, num_gpus=-1, mem_bytes=-(1024**3) + mem_bytes=1024 ** 3 + ) == Resource(num_cpus=1, num_gpus=1, mem_bytes=1024 ** 3) + assert -Resource(num_cpus=1, num_gpus=1, mem_bytes=1024 ** 3) == Resource( + num_cpus=-1, num_gpus=-1, mem_bytes=-(1024 ** 3) ) assert Resource(num_cpus=-1) < ZeroResource assert Resource(num_gpus=-1) < ZeroResource assert Resource(mem_bytes=-1) < ZeroResource - assert Resource(num_cpus=1, num_gpus=1, mem_bytes=-(1024**3)) < ZeroResource - assert Resource(num_cpus=1, num_gpus=1, mem_bytes=1024**3) > Resource( + assert Resource(num_cpus=1, num_gpus=1, mem_bytes=-(1024 ** 3)) < ZeroResource + assert Resource(num_cpus=1, num_gpus=1, mem_bytes=1024 ** 3) > Resource( num_cpus=10, num_gpus=1, mem_bytes=1024 ) - assert Resource(num_cpus=1, num_gpus=10, mem_bytes=1024**3) > Resource( - num_cpus=10, num_gpus=1, mem_bytes=1024**3 + assert Resource(num_cpus=1, num_gpus=10, mem_bytes=1024 ** 3) > Resource( + num_cpus=10, num_gpus=1, mem_bytes=1024 ** 3 ) - assert Resource(num_cpus=100, num_gpus=10, mem_bytes=1024**3) > Resource( - num_cpus=10, num_gpus=10, mem_bytes=1024**3 + assert Resource(num_cpus=100, num_gpus=10, mem_bytes=1024 ** 3) > Resource( + num_cpus=10, num_gpus=10, mem_bytes=1024 ** 3 ) assert Resource(num_cpus=100, num_gpus=10, mem_bytes=1024) - Resource( num_cpus=10, num_gpus=20, mem_bytes=512 diff --git a/mars/utils.py b/mars/utils.py index 6f2b807d94..1c3ff47e44 100644 --- a/mars/utils.py +++ b/mars/utils.py @@ -233,17 +233,17 @@ def readable_size(size: int, trunc: bool = False) -> str: if size < 1024: ret_size = size size_unit = "" - elif 1024 <= size < 1024**2: + elif 1024 <= size < 1024 ** 2: ret_size = size * 1.0 / 1024 size_unit = "K" - elif 1024**2 <= size < 1024**3: - ret_size = size * 1.0 / (1024**2) + elif 1024 ** 2 <= size < 1024 ** 3: + ret_size = size * 1.0 / (1024 ** 2) size_unit = "M" - elif 1024**3 <= size < 1024**4: - ret_size = size * 1.0 / (1024**3) + elif 1024 ** 3 <= size < 1024 ** 4: + ret_size = size * 1.0 / (1024 ** 3) size_unit = "G" else: - ret_size = size * 1.0 / (1024**4) + ret_size = size * 1.0 / (1024 ** 4) size_unit = "T" if not trunc: