pbalapra
diff --git a/Diff for: ‎deephyper/__version__.py
+1-1 b/Diff for: ‎deephyper/__version__.py
+1-1
diff --git a/Diff for: ‎deephyper/benchmark/nas/nasbench101/__init__.py
+6-2 b/Diff for: ‎deephyper/benchmark/nas/nasbench101/__init__.py
+6-2
diff --git a/Diff for: ‎deephyper/benchmark/nas/nasbench101/run.py renamed to ‎deephyper/benchmark/nas/nasbench101/run_full.py
-1 b/Diff for: ‎deephyper/benchmark/nas/nasbench101/run.py renamed to ‎deephyper/benchmark/nas/nasbench101/run_full.py
-1
diff --git a/Diff for: ‎deephyper/benchmark/nas/nasbench101/run_only108.py
+18 b/Diff for: ‎deephyper/benchmark/nas/nasbench101/run_only108.py
+18
diff --git a/Diff for: ‎deephyper/evaluator/_balsam.py
+25-12 b/Diff for: ‎deephyper/evaluator/_balsam.py
+25-12
diff --git a/Diff for: ‎deephyper/evaluator/_mpiWorkerPool.py
+38-30 b/Diff for: ‎deephyper/evaluator/_mpiWorkerPool.py
+38-30
diff --git a/Diff for: ‎deephyper/evaluator/_processPool.py
+16-17 b/Diff for: ‎deephyper/evaluator/_processPool.py
+16-17
@@ -1,4 +1,4 @@
-VERSION = (0, 1, 9)
+VERSION = (0, 1, 10)
 
 __version__ = ".".join(map(str, VERSION))
 
 
@@ -9,7 +9,11 @@
 
     sh download_only108.sh
 
-An example usage with the regularized evolution search is::
+An example usage with the regularized evolution search if you used ``download_only108.sh``::
 
-    python -m deephyper.search.nas.regevo --problem deephyper.benchmark.nas.nasbench101.problem.Problem --evaluator threadPool --run deephyper.benchmark.nas.nasbench101.run.run --max-evals 10000
+    python -m deephyper.search.nas.regevo --problem deephyper.benchmark.nas.nasbench101.problem.Problem --evaluator threadPool --run deephyper.benchmark.nas.nasbench101.run_only108.run --max-evals 1000
+
+Or, if you used ``download_full.sh``::
+
+    python -m deephyper.search.nas.regevo --problem deephyper.benchmark.nas.nasbench101.problem.Problem --evaluator threadPool --run deephyper.benchmark.nas.nasbench101.run_full.run --max-evals 1000
 """
@@ -5,7 +5,6 @@
 HERE = os.path.dirname(os.path.abspath(__file__))
 
 # # Use nasbench_full.tfrecord for full dataset (run download command above).
-# data_file = os.path.join(HERE, "nasbench_only108.tfrecord")
 data_file = os.path.join(HERE, "nasbench_full.tfrecord")
 nasbench = api.NASBench(data_file)
 
 
@@ -0,0 +1,18 @@
+import os
+from deephyper.benchmark.nas.nasbench101.util import create_search_space, evaluate_ops
+from nasbench import api
+
+HERE = os.path.dirname(os.path.abspath(__file__))
+
+# # Use nasbench_full.tfrecord for full dataset (run download command above).
+data_file = os.path.join(HERE, "nasbench_only108.tfrecord")
+nasbench = api.NASBench(data_file)
+
+def run(config):
+
+    ss = create_search_space()
+
+    ops = config["arch_seq"]
+    val_acc = evaluate_ops(ss, ops, nasbench)
+
+    return val_acc
@@ -30,28 +30,41 @@ class BalsamEvaluator(Evaluator):
             used as the key for caching evaluations. Multiple inputs that map to the same
             hashable key will only be evaluated once. If ``None``, then cache_key defaults
             to a lossless (identity) encoding of the input dict.
+        num_nodes_per_eval (int):
     """
 
-    def __init__(self, run_function, cache_key=None, **kwargs):
+    def __init__(self, run_function, cache_key=None, num_nodes_master=1, num_nodes_per_eval=1, num_ranks_per_node=1, num_evals_per_node=1, num_threads_per_rank=64, **kwargs):
         super().__init__(run_function, cache_key)
         self.id_key_map = {}
+
+        # Attributes related to scaling policy
+        self.num_nodes_master = num_nodes_master
+        self.num_nodes_per_eval = num_nodes_per_eval
+        self.num_ranks_per_node = num_ranks_per_node
+        self.num_evals_per_node = num_evals_per_node
+        self.num_threads_per_rank = num_threads_per_rank
+
         # reserve 1 DeepHyper worker for searcher process
         if LAUNCHER_NODES == 1:
             # --job-mode=serial edge case where 2 ranks (Master, Worker) are placed on the node
-            self.num_workers = self.WORKERS_PER_NODE - 1
+            self.num_workers = self.num_evals_per_node - 1
             # 1 node case for --job-mode=mpi will result in search process occupying
             # entirety of the only node ---> no evaluator workers (also should have DEEPHYPER_WORKERS_PER_NODE=1)
         else:
             if JOB_MODE == "serial":
                 # MPI ensemble Master rank0 occupies entirety of first node
-                self.num_workers = (LAUNCHER_NODES - 1) * self.WORKERS_PER_NODE - 1
+                assert self.num_nodes_master == 1, f"num_nodes_master=={self.num_nodes_master} when it should be 1 because job-mode is 'serial'."
+                self.num_workers = (LAUNCHER_NODES - 1) * self.num_evals_per_node - self.num_nodes_master
             if JOB_MODE == "mpi":
                 # all nodes free, but restricted to 1 job=worker per node
-                assert self.WORKERS_PER_NODE == 1
-                self.num_workers = LAUNCHER_NODES * self.WORKERS_PER_NODE - 1
+                self.num_workers = LAUNCHER_NODES - self.num_nodes_master
+                self.num_workers //= self.num_nodes_per_eval
+        assert self.num_workers > 0, f"The number of workers is {self.num_workers} when it shoud be > 0."
+
         logger.info("Balsam Evaluator instantiated")
         logger.debug(f"LAUNCHER_NODES = {LAUNCHER_NODES}")
-        logger.debug(f"WORKERS_PER_NODE = {self.WORKERS_PER_NODE}")
+        logger.debug(f"WORKERS_PER_NODE = {self.num_evals_per_node}")
+        logger.debug(f"NUM_NODES_PER_EVAL = {self.num_nodes_per_eval}")
         logger.debug(f"Total number of workers: {self.num_workers}")
         logger.info(f"Backend runs will use Python: {self.PYTHON_EXE}")
         self._init_app()
@@ -109,13 +122,13 @@ def _eval_exec(self, x):
     def _create_balsam_task(self, x):
         args = f"'{self.encode(x)}'"
         envs = f"KERAS_BACKEND={self.KERAS_BACKEND}"
-        # envs = ":".join(f'KERAS_BACKEND={self.KERAS_BACKEND} OMP_NUM_THREADS=62 KMP_BLOCKTIME=0 KMP_AFFINITY=\"granularity=fine,compact,1,0\"'.split())
         resources = {
-            "num_nodes": 1,
-            "ranks_per_node": 1,
-            "threads_per_rank": 64,
-            "node_packing_count": self.WORKERS_PER_NODE,
+            "num_nodes": self.num_nodes_per_eval,
+            "ranks_per_node": self.num_ranks_per_node,
+            "threads_per_rank": self.num_threads_per_rank,
+            "node_packing_count": self.num_evals_per_node
         }
+
         for key in resources:
             if key in x:
                 resources[key] = x[key]
@@ -135,5 +148,5 @@ def _on_done(job):
 
     @staticmethod
     def _on_fail(job):
-        logger.info(f"Task {job.cute_id} failed; setting objective as float_max")
+        logger.info(f"Task {job.cute_id} failed; setting objective as float_min")
         return Evaluator.FAIL_RETURN_VALUE
@@ -6,10 +6,10 @@
 from deephyper.evaluator.evaluate import Evaluator
 
 logger = logging.getLogger(__name__)
-WaitResult = namedtuple('WaitResult', ['active', 'done', 'failed', 'cancelled'])
+WaitResult = namedtuple("WaitResult", ["active", "done", "failed", "cancelled"])
 
 
-class MPIFuture():
+class MPIFuture:
     """MPIFuture is a class meant to track a pending evaluation.
     It record whether it was posted to a worker, the associated
     MPI request, the tag, and the command that was sent."""
@@ -28,7 +28,7 @@ def posted(self):
     def post(self, comm, worker, tag):
         """Posts the request to a particular worker,
         with a particular tag."""
-        if(self.posted):
+        if self.posted:
             raise ValueError("Request already posted")
         comm.send(self._cmd, dest=worker, tag=tag)
         self._worker = worker
@@ -57,7 +57,7 @@ def _set_result(self, value):
     def test(self):
         """Tests if the request has completed."""
         completed, result = MPI.Request.test(self._request)
-        if(completed):
+        if completed:
             self._set_result(result)
         return completed
 
@@ -66,7 +66,7 @@ def waitany(futures):
         """Waits for any of the provided futures to complete
         and sets the result of the one that completed."""
         status = MPI.Status()
-        requests = [ f._request for f in futures]
+        requests = [f._request for f in futures]
         idx, result = MPI.Request.waitany(requests, status=status)
         f = futures[idx]
         f._set_result(result)
@@ -76,10 +76,11 @@ def waitany(futures):
     def waitall(futures):
         """Waits for all the provided futures to complete and
         sets their result."""
-        results = MPI.Request.waitall([ f._request for f in futures ])
+        results = MPI.Request.waitall([f._request for f in futures])
         for r, f in zip(results, futures):
             f._set_result(r)
 
+
 class MPIWorkerPool(Evaluator):
     """Evaluator using a pool of MPI workers.
 
@@ -91,21 +92,33 @@ class MPIWorkerPool(Evaluator):
                           If ``None``, then cache_key defaults to a lossless (identity)
                           encoding of the input dict.
     """
-    def __init__(self, run_function, cache_key=None, comm=None, **kwargs):
+
+    def __init__(
+        self,
+        run_function,
+        cache_key=None,
+        comm=None,
+        num_nodes_master=1,
+        num_nodes_per_eval=1,
+        num_ranks_per_node=1,
+        num_evals_per_node=1,
+        num_threads_per_rank=64,
+        **kwargs
+    ):
         """Constructor."""
         super().__init__(run_function, cache_key)
-        if(comm is None):
+        if comm is None:
             self.comm = MPI.COMM_WORLD
         else:
             self.comm = comm
-        self.num_workers = self.comm.Get_size()-1
+        self.num_workers = self.comm.Get_size() - 1
         self.avail_workers = []
-        for tag in range(0, self.WORKERS_PER_NODE):
+        for tag in range(0, num_ranks_per_node):
             for rank in range(0, self.num_workers):
-                self.avail_workers.append((rank+1, tag+1))
+                self.avail_workers.append((rank + 1, tag + 1))
         funcName = self._run_function.__name__
         moduleName = self._run_function.__module__
-        self.appName = '.'.join((moduleName, funcName))
+        self.appName = ".".join((moduleName, funcName))
 
     def _try_posting(self, unposted):
         """This function takes a list of MPIFuture instances that aren't
@@ -115,7 +128,7 @@ def _try_posting(self, unposted):
         now_posted = []
         now_unposted = []
         for f in unposted:
-            if(len(self.avail_workers) > 0):
+            if len(self.avail_workers) > 0:
                 worker, tag = self.avail_workers.pop()
                 f.post(self.comm, worker, tag)
                 now_posted.append(f)
@@ -128,29 +141,29 @@ def _eval_exec(self, x):
         with the provided point x as argument. Returns an instance
         of MPIFuture. If possible, this future will have been posted."""
         assert isinstance(x, dict)
-        cmd = {'cmd': 'exec', 'args': [x] }
+        cmd = {"cmd": "exec", "args": [x]}
         future = MPIFuture(cmd)
-        if(len(self.avail_workers) > 0):
+        if len(self.avail_workers) > 0:
             worker, tag = self.avail_workers.pop()
             future.post(self.comm, worker, tag)
         return future
 
-    def wait(self, futures, timeout=None, return_when='ANY_COMPLETED'):
+    def wait(self, futures, timeout=None, return_when="ANY_COMPLETED"):
         """Waits for a set of futures to complete. If return_when == ANY_COMPLETED,
         this function will return as soon as at least one of the futures has completed.
         Otherwise it will wait for all the futures to have completed."""
         # TODO: for now the timeout is not taken into account and
         # the failed and cancelled lists will always be empty.
-        done, failed, cancelled, active = [],[],[],[]
+        done, failed, cancelled, active = [], [], [], []
         posted = [f for f in futures if f.posted]
         unposted = [f for f in futures if not f.posted]
 
-        if(len(posted) == 0):
+        if len(posted) == 0:
             newly_posted, unposted = self._try_posting(unposted)
             posted.extend(newly_posted)
 
-        if(return_when == 'ALL_COMPLETED'):
-            while(len(posted) > 0 or len(unposted) > 0):
+        if return_when == "ALL_COMPLETED":
+            while len(posted) > 0 or len(unposted) > 0:
                 MPIFuture.waitall(posted)
                 for f in posted:
                     self.avail_workers.append((f.worker, f.tag))
@@ -167,18 +180,18 @@ def wait(self, futures, timeout=None, return_when='ANY_COMPLETED'):
                     one_completed = True
                     done.append(f)
                     # one request completed, try posting a new request
-                    if(len(unposted) > 0):
+                    if len(unposted) > 0:
                         p = unposted.pop(0)
                         p.post(self.comm, worker=f.worker, tag=f.tag)
                         active.append(p)
                     else:
                         self.avail_workers.append((f.worker, f.tag))
                 else:
                     active.append(f)
-            if not one_completed: # we need to call waitany
+            if not one_completed:  # we need to call waitany
                 f = MPIFuture.waitany(posted)
                 done.append(f)
-                if(len(unposted) > 0): 
+                if len(unposted) > 0:
                     p = unposted.pop(0)
                     p.post(self.comm, worker=f.worker, tag=f.tag)
                     active.append(p)
@@ -187,18 +200,13 @@ def wait(self, futures, timeout=None, return_when='ANY_COMPLETED'):
             for f in unposted:
                 active.append(f)
 
-        return WaitResult(
-            active=active,
-            done=done,
-            failed=failed,
-            cancelled=cancelled
-        )
+        return WaitResult(active=active, done=done, failed=failed, cancelled=cancelled)
 
     def shutdown_workers(self):
         """Shuts down all the MPIWorker instances."""
         req = []
         for k in range(1, self.comm.Get_size()):
-            r = self.comm.isend({'cmd': 'exit'}, dest=k, tag=0)
+            r = self.comm.isend({"cmd": "exit"}, dest=k, tag=0)
             req.append(r)
         MPI.Request.waitall(req)
 
 
@@ -7,7 +7,8 @@
 from deephyper.evaluator.evaluate import Evaluator
 
 logger = logging.getLogger(__name__)
-WaitResult = namedtuple('WaitResult', ['active', 'done', 'failed', 'cancelled'])
+WaitResult = namedtuple("WaitResult", ["active", "done", "failed", "cancelled"])
+
 
 class ProcessPoolEvaluator(Evaluator):
     """Evaluator using ProcessPoolExecutor.
@@ -18,27 +19,30 @@ class ProcessPoolEvaluator(Evaluator):
         run_function (func): takes one parameter of type dict and returns a scalar value.
         cache_key (func): takes one parameter of type dict and returns a hashable type, used as the key for caching evaluations. Multiple inputs that map to the same hashable key will only be evaluated once. If ``None``, then cache_key defaults to a lossless (identity) encoding of the input dict.
     """
+
     def __init__(self, run_function, cache_key=None, **kwargs):
         super().__init__(run_function, cache_key)
-        self.num_workers = self.WORKERS_PER_NODE
-        self.executor = ProcessPoolExecutor(
-            max_workers = self.num_workers
+        self.num_workers = 1
+        self.executor = ProcessPoolExecutor(max_workers=self.num_workers)
+        logger.info(
+            f"ProcessPool Evaluator will execute {self._run_function.__name__}() from module {self._run_function.__module__}"
         )
-        logger.info(f"ProcessPool Evaluator will execute {self._run_function.__name__}() from module {self._run_function.__module__}")
 
     def _eval_exec(self, x):
         assert isinstance(x, dict)
         future = self.executor.submit(self._run_function, x)
         return future
 
-    def wait(self, futures, timeout=None, return_when='ANY_COMPLETED'):
-        return_when=return_when.replace('ANY','FIRST')
+    def wait(self, futures, timeout=None, return_when="ANY_COMPLETED"):
+        return_when = return_when.replace("ANY", "FIRST")
         results = _futures_wait(futures, timeout=timeout, return_when=return_when)
-        done, failed, cancelled = [],[],[]
+        done, failed, cancelled = [], [], []
         active = list(results.not_done)
-        if len(active) > 0 and return_when=='ALL_COMPLETED':
-            raise TimeoutError(f'{timeout} sec timeout expired while '
-            f'waiting on {len(futures)} tasks until {return_when}')
+        if len(active) > 0 and return_when == "ALL_COMPLETED":
+            raise TimeoutError(
+                f"{timeout} sec timeout expired while "
+                f"waiting on {len(futures)} tasks until {return_when}"
+            )
         for res in results.done:
             try:
                 res.result(timeout=0)
@@ -51,9 +55,4 @@ def wait(self, futures, timeout=None, return_when='ANY_COMPLETED'):
                 failed.append(res)
             else:
                 done.append(res)
-        return WaitResult(
-            active=active,
-            done=done,
-            failed=failed,
-            cancelled=cancelled
-        )
+        return WaitResult(active=active, done=done, failed=failed, cancelled=cancelled)
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-VERSION = (0, 1, 9)`
	`1`	`+VERSION = (0, 1, 10)`
`2`	`2`
`3`	`3`	`__version__ = ".".join(map(str, VERSION))`
`4`	`4`