QueensGambit
diff --git a/‎DeepCrazyhouse/configs/train_config.py
Lines changed: 2 additions & 2 deletions b/‎DeepCrazyhouse/configs/train_config.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎DeepCrazyhouse/src/training/trainer_agent_pytorch.py
Lines changed: 3 additions & 0 deletions b/‎DeepCrazyhouse/src/training/trainer_agent_pytorch.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎engine/src/agents/rawnetagent.cpp
Lines changed: 8 additions & 1 deletion b/‎engine/src/agents/rawnetagent.cpp
Lines changed: 8 additions & 1 deletion
diff --git a/‎engine/src/agents/rawnetagent.h
Lines changed: 8 additions & 0 deletions b/‎engine/src/agents/rawnetagent.h
Lines changed: 8 additions & 0 deletions
diff --git a/‎engine/src/rl/fileio.py
Lines changed: 181 additions & 34 deletions b/‎engine/src/rl/fileio.py
Lines changed: 181 additions & 34 deletions
@@ -71,7 +71,7 @@ class TrainConfig:
     info_model_type: str = "model_type defines the Model type that used during training (e.g. resnet, vit, risev2," \
                            " risev3, alphavile, alphavile-tiny, alphavile-small, alphavile-normal, alphavile-large," \
                            " NextViT)"
-    model_type: str = "resnet"
+    model_type: str = "risev3"
 
     info_k_steps_initial: str = "k_steps_initial defines how many steps have been trained before (k_steps_initial != 0 if" \
                            " you continue training from a checkpoint)" \
@@ -197,7 +197,7 @@ def rl_train_config():
     tc.wdl_loss_factor = 0.499 if tc.use_plys_to_end else 0.5
 
     tc.nb_training_epochs = 1  # define how many epochs the network will be trained
-    tc.q_value_ratio = 0.15
+    tc.q_value_ratio = 0  # previously 0.15
     tc.sparse_policy_label = False
 
     return tc
 
@@ -58,6 +58,9 @@ def __init__(
         """
         self.additional_loaders = additional_loaders
         self.tc = train_config
+        if self.tc.use_wdl and self.tc.q_value_ratio != 0:
+            raise Exception("q_value_ratio must be 0 for use_wdl = True.")
+
         self.to = train_objects
         if self.to.metrics is None:
             self.to.metrics = {}
 
@@ -35,6 +35,13 @@ RawNetAgent::RawNetAgent(const vector<unique_ptr<NeuralNetAPI>>& nets, const Pla
 {
 }
 
+size_t RawNetAgent::select_nn_index() {
+    if (nets.size() == 1) {
+        return 0;
+    }
+    return phaseToNetsIndex.at(state->get_phase(numPhases, searchSettings->gamePhaseDefinition));
+}
+
 void RawNetAgent::evaluate_board_state()
 {
     evalInfo->legalMoves = state->legal_actions();
@@ -55,7 +62,7 @@ void RawNetAgent::evaluate_board_state()
         return;
     }
     state->get_state_planes(true, inputPlanes, nets.front()->get_version());
-    nets[phaseToNetsIndex.at(state->get_phase(numPhases, searchSettings->gamePhaseDefinition))]->predict(inputPlanes, valueOutputs, probOutputs, auxiliaryOutputs);
+    nets[select_nn_index()]->predict(inputPlanes, valueOutputs, probOutputs, auxiliaryOutputs);
     state->set_auxiliary_outputs(auxiliaryOutputs);
 
     evalInfo->policyProbSmall.resize(evalInfo->legalMoves.size());
 
@@ -41,6 +41,14 @@ using namespace crazyara;
 
 class RawNetAgent: public Agent
 {
+private:
+    /**
+     * @brief select_nn_index Returns the index according to phaseToNetsIndex.
+     * If no phases is enabled, 0 will be returned.
+     * @return phaseToNetsIndex index or 0
+     */
+    size_t select_nn_index();
+
 public:
     const SearchSettings* searchSettings;
 
 
@@ -65,6 +65,25 @@ def compress_zarr_dataset(data, file_path, compression='lz4', clevel=5, start_id
     return nan_detected
 
 
+def check_for_moe(model_dir: str):
+    """
+    Extracts the number of phases from the given model directory.
+    Returns true if mixture of experts is used.
+    The second return argument is the number of phases.
+    :param model_dir: Model directory, where either the model directly is stored or the number of phase directories.
+    :return: is_moe: bool, number_phases: int or None
+    """
+    number_phases = 0
+    is_moe = False
+    for entry in os.listdir(model_dir):
+        if entry.startswith("phase") and "None" not in entry:
+            number_phases += 1
+            is_moe = True
+    if not is_moe:
+        number_phases = None
+    return is_moe, number_phases
+
+
 class FileIO:
     """
     Class to facilitate creation of directories, reading of file
@@ -100,6 +119,17 @@ def __init__(self, orig_binary_name: str, binary_dir: str, uci_variant: str):
 
         self.timestamp_format = "%Y-%m-%d-%H-%M-%S"
 
+        self.is_moe, self.number_phases = check_for_moe(self.model_dir)
+
+        # Whether to use Staged learning v2.0 for MoE training,
+        # i.e. first train on full data and then each phase separately
+        self.use_moe_staged_learning = True if os.path.isdir(self.model_dir + "phaseNone") else False
+
+        if self.is_moe:
+            logging.info(f"Mixture of experts detected with {self.number_phases} phases.")
+            logging.info(f"Use MoE staged learning is {self.use_moe_staged_learning}.")
+        else:
+            logging.info("No mixture of experts detected.")
         self._create_directories()
 
         # Adjust paths in main_config
@@ -124,14 +154,42 @@ def _create_directories(self):
         create_dir(self.model_dir_archive)
         create_dir(self.logs_dir_archive)
 
-    def _include_data_from_replay_memory(self, nb_files: int, fraction_for_selection: float):
+        if self.is_moe:
+            for directory in [self.export_dir_gen_data, self.train_dir, self.val_dir, self.train_dir_archive,
+                              self.val_dir_archive, self.model_contender_dir, self.model_dir_archive]:
+                for phase_idx in range(self.number_phases):
+                    create_dir(directory + f"phase{phase_idx}")
+            if self.use_moe_staged_learning:
+                create_dir(self.model_contender_dir + "phaseNone")
+                create_dir(self.model_dir_archive + "phaseNone")
+
+    def _include_data_from_replay_memory_wrapper(self, nb_files: int, fraction_for_selection: float):
         """
+        Wrapper for _include_data_from_replay_memory() which handles MoE and non MoE cases.
+        :param nb_files: Number of files to include from replay memory into training
+        :param fraction_for_selection: Proportion for selecting files from the replay memory
+        """
+
+        if not self.is_moe:
+            self._include_data_from_replay_memory(self.train_dir_archive, self.train_dir, nb_files,
+                                                  fraction_for_selection)
+        else:
+            for phase_idx in range(self.number_phases):
+                self._include_data_from_replay_memory(self.train_dir_archive + f"phase{phase_idx}/",
+                                                      self.train_dir + f"phase{phase_idx}/", nb_files,
+                                                      fraction_for_selection)
+
+    def _include_data_from_replay_memory(self, from_dir: str, to_dir: str, nb_files: int, fraction_for_selection: float):
+        """
+        Moves data from the from_dir directory to the to_dir directory.
+        :param from_dir: Usually train_dir_archive
+        :param to_dir: Usually train_dir
         :param nb_files: Number of files to include from replay memory into training
         :param fraction_for_selection: Proportion for selecting files from the replay memory
         :return:
         """
         # get all file/folder names ignoring hidden files
-        folder_names = [folder_name for folder_name in os.listdir(self.train_dir_archive)
+        folder_names = [folder_name for folder_name in os.listdir(from_dir)
                         if not folder_name.startswith('.')]
 
         if len(folder_names) < nb_files:
@@ -153,90 +211,160 @@ def _include_data_from_replay_memory(self, nb_files: int, fraction_for_selection
 
         # move selected files into train dir
         for index in list(indices):
-            os.rename(self.train_dir_archive + folder_names[index], self.train_dir + folder_names[index])
+            os.rename(from_dir + folder_names[index], to_dir + folder_names[index])
 
     def _move_generated_data_to_train_val(self):
         """
         Moves the generated samples, games (pgn format) and the number how many games have been generated to the given
         training and validation directory
         :return:
         """
-        file_names = os.listdir(self.export_dir_gen_data)
+        if not self.is_moe:
+            file_names = os.listdir(self.export_dir_gen_data)
+
+            # move the last file into the validation directory
+            os.rename(self.export_dir_gen_data + file_names[-1], self.val_dir + file_names[-1])
+
+            # move the rest into the training directory
+            for file_name in file_names[:-1]:
+                os.rename(self.export_dir_gen_data + file_name, self.train_dir + file_name)
+        else:
+            for phase_idx in range(self.number_phases):
+                file_names = os.listdir(self.export_dir_gen_data + f"/phase{phase_idx}")
 
-        # move the last file into the validation directory
-        os.rename(self.export_dir_gen_data + file_names[-1], self.val_dir + file_names[-1])
+                # move the last file into the validation directory
+                os.rename(self.export_dir_gen_data + f"/phase{phase_idx}/" + file_names[-1],
+                          self.val_dir + f"/phase{phase_idx}/" + file_names[-1])
 
-        # move the rest into the training directory
-        for file_name in file_names[:-1]:
-            os.rename(self.export_dir_gen_data + file_name, self.train_dir + file_name)
+                # move the rest into the training directory
+                for file_name in file_names[:-1]:
+                    os.rename(self.export_dir_gen_data + f"/phase{phase_idx}/" + file_name,
+                              self.train_dir + f"/phase{phase_idx}/" + file_name)
 
     def _move_train_val_data_into_archive(self):
         """
         Moves files from training, validation dir into archive directory
         :return:
         """
-        move_all_files(self.train_dir, self.train_dir_archive)
-        move_all_files(self.val_dir, self.val_dir_archive)
+        self._move_all_files_wrapper(self.train_dir, self.train_dir_archive)
+        self._move_all_files_wrapper(self.val_dir, self.val_dir_archive)
 
     def _remove_files_in_weight_dir(self):
         """
         Removes all files in the weight directory.
         :return:
         """
-        file_list = glob.glob(os.path.join(self.weight_dir, "model-*"))
-        for file in file_list:
-            os.remove(file)
+        if not self.is_moe:
+            file_list = glob.glob(os.path.join(self.weight_dir, "model-*"))
+            for file in file_list:
+                os.remove(file)
+        else:
+            for phase_idx in range(self.number_phases):
+                file_list = glob.glob(os.path.join(self.weight_dir, f"phase{phase_idx}/model-*"))
+                for file in file_list:
+                    os.remove(file)
 
-    def compress_dataset(self, device_name: str):
+    def _compress_single_dataset(self, phase: str, device_name: str):
         """
-        Loads the uncompressed data file, selects all sample until the index specified in "startIdx.txt",
-        compresses it and exports it.
-        :param device_name: The currently active device name (context_device-id)
-        :return:
+        Loads a single uncompressed data file, selects all samples, compresses it and exports it.
+        :param phase: Phase to use, e.g. "phase0/", "phase1". Is empty string for no phase ("").
+        :return: export_dir: str
         """
-        data = zarr.load(self.binary_dir + "data_" + device_name + ".zarr")
+        data = zarr.load(self.binary_dir + phase + "data_" + device_name + ".zarr")
 
-        export_dir, time_stamp = self.create_export_dir(device_name)
+        export_dir, time_stamp = self.create_export_dir(phase, device_name)
         zarr_path = export_dir + time_stamp + ".zip"
-        nan_detected = compress_zarr_dataset(data, zarr_path, start_idx=0)
+
+        end_idx = self._retrieve_end_idx(data)
+
+        nan_detected = compress_zarr_dataset(data, zarr_path, start_idx=0, end_idx=end_idx)
         if nan_detected is True:
             logging.error("NaN value detected in file %s.zip" % time_stamp)
             new_export_dir = self.binary_dir + time_stamp
             os.rename(export_dir, new_export_dir)
             export_dir = new_export_dir
-        self.move_game_data_to_export_dir(export_dir, device_name)
 
-    def create_export_dir(self, device_name: str) -> (str, str):
+        return export_dir
+
+    def _retrieve_end_idx(self, data):
+        """
+        Checks the y_policy sum in the data for is_moe is False and
+        returns the first occurence of only 0s.
+        An end_idx of 0 means the whole dataset will be used
+        :param data: Zarr data object
+        :return: end_idx
+        """
+        if self.is_moe is False:
+            return 0
+
+        sum_y_policy = data['y_policy'].sum(axis=1)
+        potential_end_idx = sum_y_policy.argmin()
+        if sum_y_policy[potential_end_idx] == 0:
+            return potential_end_idx
+        return 0
+
+    def compress_dataset(self, device_name: str):
+        """
+        Calls _compress_single_dataset() for each phase or a single time for no phases.
+        Also moves the game data to export directory.
+        :param device_name: The currently active device name (context_device-id)
+        :return:
+        """
+        if self.is_moe:
+            for phase_idx in range(self.number_phases):
+                export_dir = self._compress_single_dataset(f"phase{phase_idx}/", device_name)
+                if phase_idx == 0:
+                    self.move_game_data_to_export_dir(export_dir, device_name)
+        else:
+            export_dir = self._compress_single_dataset("", device_name)
+            self.move_game_data_to_export_dir(export_dir, device_name)
+
+    def create_export_dir(self, phase: str, device_name: str) -> (str, str):
         """
         Create a directory in the 'export_dir_gen_data' path,
         where the name consists of the current date, time and device ID.
+        :param phase: Phase to use, e.g. "phase0/", "phase1". Is empty string for no phase ("").
         :param device_name: The currently active device name (context_device-id)
         :return: Path of the created directory; Time stamp used while creating
         """
         # include current timestamp in dataset export file
         time_stamp = datetime.datetime.fromtimestamp(time.time()).strftime(self.timestamp_format)
-        time_stamp_dir = f'{self.export_dir_gen_data}{time_stamp}-{device_name}/'
+        time_stamp_dir = f'{self.export_dir_gen_data}{phase}{time_stamp}-{device_name}/'
         # create a directory of the current time_stamp
         if not os.path.exists(time_stamp_dir):
             os.makedirs(time_stamp_dir)
 
         return time_stamp_dir, time_stamp
 
-    def get_current_model_tar_file(self) -> str:
+    def get_current_model_tar_file(self, phase=None, base_dir=None) -> str:
         """
+        :param phase: Phase to use. Should be "" if no MoE is used and otherwise e.g. "phase2".
+        :param base_dir: Should be self.model_dir in the normal case
+        For None default "phase0" or "" will be used.
         Return the filename of the current active model weight (.tar) file for pytorch
         """
-        model_params = glob.glob(self.model_dir + "/*.tar")
+        if phase is None:
+            if self.is_moe:
+                phase = "phase0"
+            else:
+                phase = ""
+        if base_dir is None:
+            base_dir = self.model_dir
+        model_params = glob.glob(base_dir + phase + "/*.tar")
         if len(model_params) == 0:
             raise FileNotFoundError(f'No model file found in {self.model_dir}')
         return model_params[0]
 
     def get_number_generated_files(self) -> int:
         """
         Returns the amount of file that have been generated since the last training run.
-        :return:
+        :return: nb_training_files: int
         """
-        return len(glob.glob(self.export_dir_gen_data + "**/*.zip"))
+        if self.is_moe:
+            phase = "phase0/"
+        else:
+            phase = ""
+        return len(glob.glob(self.export_dir_gen_data + phase + "**/*.zip"))
 
     def move_game_data_to_export_dir(self, export_dir: str, device_name: str):
         """
@@ -271,12 +399,12 @@ def prepare_data_for_training(self, rm_nb_files: int, rm_fraction_for_selection:
         if did_contender_win:
             self._move_train_val_data_into_archive()
         # move last contender into archive
-        move_all_files(self.model_contender_dir, self.model_dir_archive)
+        self._move_all_files_wrapper(self.model_contender_dir, self.model_dir_archive)
 
         self._move_generated_data_to_train_val()
         # We don’t need them anymore; the last model from last training has already been saved
         self._remove_files_in_weight_dir()
-        self._include_data_from_replay_memory(rm_nb_files, rm_fraction_for_selection)
+        self._include_data_from_replay_memory_wrapper(rm_nb_files, rm_fraction_for_selection)
 
     def remove_intermediate_weight_files(self):
         """
@@ -288,10 +416,29 @@ def remove_intermediate_weight_files(self):
         for f in files:
             os.remove(f)
 
+    def _move_all_files_wrapper(self, from_dir, to_dir):
+        """
+        Wrapper function for move_all_files(from_dir, to_dir).
+        In case of self.is_moe, all phases directories are moved as well.
+        :param from_dir: Origin directory where the files are located
+        :param to_dir: Destination directory where all files (including subdirectories directories) will be moved
+        :return:
+        """
+        if not self.is_moe:
+            move_all_files(from_dir, to_dir)
+        else:
+            for phase_idx in range(self.number_phases):
+                move_all_files(from_dir + f"phase{phase_idx}/", to_dir + f"phase{phase_idx}/")
+
+            if self.use_moe_staged_learning:
+                from_dir_final = from_dir + "phaseNone/"
+                to_dir_final = to_dir + "phaseNone/"
+                if os.path.isdir(from_dir_final) and os.path.isdir(to_dir_final):
+                    move_all_files(from_dir_final, to_dir_final)
+
     def replace_current_model_with_contender(self):
         """
         Moves the previous model into archive directory and the model-contender into the model directory
         """
-        move_all_files(self.model_dir, self.model_dir_archive)
-        move_all_files(self.model_contender_dir, self.model_dir)
-
+        self._move_all_files_wrapper(self.model_dir, self.model_dir_archive)
+        self._move_all_files_wrapper(self.model_contender_dir, self.model_dir)
Original file line number	Diff line number	Diff line change
`@@ -35,6 +35,13 @@ RawNetAgent::RawNetAgent(const vector<unique_ptr<NeuralNetAPI>>& nets, const Pla`
`35`	`35`	`{`
`36`	`36`	`}`
`37`	`37`
	`38`	`+size_t RawNetAgent::select_nn_index() {`
	`39`	`+ if (nets.size() == 1) {`
	`40`	`+ return 0;`
	`41`	`+ }`
	`42`	`+ return phaseToNetsIndex.at(state->get_phase(numPhases, searchSettings->gamePhaseDefinition));`
	`43`	`+}`
	`44`	`+`
`38`	`45`	`void RawNetAgent::evaluate_board_state()`
`39`	`46`	`{`
`40`	`47`	`evalInfo->legalMoves = state->legal_actions();`
`@@ -55,7 +62,7 @@ void RawNetAgent::evaluate_board_state()`
`55`	`62`	`return;`
`56`	`63`	`}`
`57`	`64`	`state->get_state_planes(true, inputPlanes, nets.front()->get_version());`
`58`		`- nets[phaseToNetsIndex.at(state->get_phase(numPhases, searchSettings->gamePhaseDefinition))]->predict(inputPlanes, valueOutputs, probOutputs, auxiliaryOutputs);`
	`65`	`+ nets[select_nn_index()]->predict(inputPlanes, valueOutputs, probOutputs, auxiliaryOutputs);`
`59`	`66`	`state->set_auxiliary_outputs(auxiliaryOutputs);`
`60`	`67`
`61`	`68`	`evalInfo->policyProbSmall.resize(evalInfo->legalMoves.size());`