Skip to content

Commit 8add623

Browse files
cjmcdonald00vapier
authored andcommitted
Add parallelism to 'branches' command
Spread the operation of querying which local branches exist across a pool of processes and build the name map of projects -> branches as these tasks finish rather than blocking on the entire query. The search operations are submitted in batches to reduce the overhead of interprocess communication. The `chunksize` argument used to control this batch size was selected by incrementing through powers of two until it stopped being faster. Change-Id: Ie3d7f799ee8e83e5058536caf53e2979175408b7 Reviewed-on: https://gerrit-review.googlesource.com/c/git-repo/+/291342 Tested-by: Chris Mcdonald <[email protected]> Reviewed-by: Mike Frysinger <[email protected]>
1 parent 9747747 commit 8add623

File tree

2 files changed

+43
-4
lines changed

2 files changed

+43
-4
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ __pycache__
77
.repopickle_*
88
/repoc
99
/.tox
10+
/.venv
1011

1112
# PyCharm related
1213
/.idea/

subcmds/branches.py

+42-4
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,20 @@
1515
# limitations under the License.
1616

1717
from __future__ import print_function
18+
import itertools
19+
import multiprocessing
1820
import sys
1921
from color import Coloring
2022
from command import Command
2123

24+
# Number of projects to submit to a single worker process at a time.
25+
# This number represents a tradeoff between the overhead of IPC and finer
26+
# grained opportunity for parallelism. This particular value was chosen by
27+
# iterating through powers of two until the overall performance no longer
28+
# improved. The performance of this batch size is not a function of the
29+
# number of cores on the system.
30+
WORKER_BATCH_SIZE = 32
31+
2232

2333
class BranchColoring(Coloring):
2434
def __init__(self, config):
@@ -97,20 +107,32 @@ class Branches(Command):
97107
98108
"""
99109

110+
def _Options(self, p):
111+
"""Add flags to CLI parser for this subcommand."""
112+
default_jobs = min(multiprocessing.cpu_count(), 8)
113+
p.add_option(
114+
'-j',
115+
'--jobs',
116+
type=int,
117+
default=default_jobs,
118+
help='Number of worker processes to spawn '
119+
'(default: %s)' % default_jobs)
120+
100121
def Execute(self, opt, args):
101122
projects = self.GetProjects(args)
102123
out = BranchColoring(self.manifest.manifestProject.config)
103124
all_branches = {}
104125
project_cnt = len(projects)
126+
with multiprocessing.Pool(processes=opt.jobs) as pool:
127+
project_branches = pool.imap_unordered(
128+
expand_project_to_branches, projects, chunksize=WORKER_BATCH_SIZE)
105129

106-
for project in projects:
107-
for name, b in project.GetBranches().items():
108-
b.project = project
130+
for name, b in itertools.chain.from_iterable(project_branches):
109131
if name not in all_branches:
110132
all_branches[name] = BranchInfo(name)
111133
all_branches[name].add(b)
112134

113-
names = list(sorted(all_branches))
135+
names = sorted(all_branches)
114136

115137
if not names:
116138
print(' (no branches)', file=sys.stderr)
@@ -180,3 +202,19 @@ def Execute(self, opt, args):
180202
else:
181203
out.write(' in all projects')
182204
out.nl()
205+
206+
207+
def expand_project_to_branches(project):
208+
"""Expands a project into a list of branch names & associated information.
209+
210+
Args:
211+
project: project.Project
212+
213+
Returns:
214+
List[Tuple[str, git_config.Branch]]
215+
"""
216+
branches = []
217+
for name, b in project.GetBranches().items():
218+
b.project = project
219+
branches.append((name, b))
220+
return branches

0 commit comments

Comments
 (0)