forked from NVIDIA/NVFlare
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
2663882
commit a979853
Showing
18 changed files
with
508 additions
and
180 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import copy | ||
import time | ||
|
||
import nvflare.client as flare | ||
from nvflare.app_common.metrics_exchange.metrics_exchanger import IPCMetricsExchanger | ||
|
||
|
||
def train(input_arr, current_round, epochs=3): | ||
metrics_exchanger = IPCMetricsExchanger() | ||
output_arr = copy.deepcopy(input_arr) | ||
num_of_data = 2000 | ||
batch_size = 16 | ||
num_of_batches = num_of_data // batch_size | ||
for i in range(epochs): | ||
for j in range(num_of_batches): | ||
metrics_exchanger.log( | ||
key="loss_for_each_batch", | ||
value=current_round * num_of_batches * epochs + i * num_of_batches + j, | ||
data_type=flare.AnalyticsDataType.SCALAR, | ||
global_step=current_round * num_of_batches * epochs + i * num_of_batches + j, | ||
) | ||
# mock training with plus 1 | ||
output_arr += 1 | ||
# assume each epoch takes 1 seconds | ||
time.sleep(1.0) | ||
return output_arr | ||
|
||
|
||
def evaluate(input_arr): | ||
# mock evaluation metrics | ||
return 100 | ||
|
||
|
||
def main(): | ||
# initializes NVFlare interface | ||
flare.init() | ||
|
||
# get system information | ||
sys_info = flare.system_info() | ||
print(f"system info is: {sys_info}") | ||
|
||
while flare.is_running(): | ||
input_model = flare.receive() | ||
print(f"received weights is: {input_model.params}") | ||
|
||
input_numpy_array = input_model.params["numpy_key"] | ||
|
||
# training | ||
output_numpy_array = train(input_numpy_array, current_round=input_model.current_round, epochs=3) | ||
|
||
# evaluation | ||
metrics = evaluate(input_numpy_array) | ||
|
||
sys_info = flare.system_info() | ||
print(f"system info is: {sys_info}") | ||
print(f"finish round: {input_model.current_round}") | ||
|
||
# send back the model | ||
print(f"send back: {output_numpy_array}") | ||
flare.send( | ||
flare.FLModel( | ||
params={"numpy_key": output_numpy_array}, | ||
params_type="FULL", | ||
metrics={"accuracy": metrics}, | ||
current_round=input_model.current_round, | ||
) | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
44 changes: 44 additions & 0 deletions
44
nvflare/app_common/metrics_exchange/client_api_metric_receiver.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import os | ||
|
||
from nvflare.apis.event_type import EventType | ||
from nvflare.apis.fl_context import FLContext | ||
from nvflare.app_common.data_exchange.piper import Piper | ||
from nvflare.client.config import ClientConfig, ConfigKey | ||
from nvflare.client.constants import CONFIG_METRICS_EXCHANGE | ||
from .metric_receiver import MetricReceiver | ||
|
||
|
||
class ClientAPIMetricReceiver(MetricReceiver): | ||
def handle_event(self, event_type: str, fl_ctx: FLContext): | ||
if event_type == EventType.START_RUN: | ||
super().handle_event(event_type, fl_ctx) | ||
self.prepare_external_config(fl_ctx) | ||
|
||
def prepare_external_config(self, fl_ctx: FLContext): | ||
workspace = fl_ctx.get_engine().get_workspace() | ||
app_dir = workspace.get_app_dir(fl_ctx.get_job_id()) | ||
config_file = os.path.join(app_dir, workspace.config_folder, CONFIG_METRICS_EXCHANGE) | ||
|
||
# prepare config exchange for data exchanger | ||
client_config = ClientConfig() | ||
config_dict = client_config.config | ||
config_dict[ConfigKey.PIPE_CHANNEL_NAME] = self.pipe_channel_name | ||
config_dict[ConfigKey.PIPE_CLASS] = Piper.get_external_pipe_class(self.pipe, fl_ctx) | ||
config_dict[ConfigKey.PIPE_ARGS] = Piper.get_external_pipe_args(self.pipe, fl_ctx) | ||
config_dict[ConfigKey.SITE_NAME] = fl_ctx.get_identity_name() | ||
config_dict[ConfigKey.JOB_ID] = fl_ctx.get_job_id() | ||
client_config.to_json(config_file) |
88 changes: 88 additions & 0 deletions
88
nvflare/app_common/metrics_exchange/memory_metrics_retriever.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from queue import Queue | ||
|
||
from nvflare.apis.event_type import EventType | ||
from nvflare.apis.fl_context import FLContext | ||
from nvflare.app_common.metrics_exchange.metrics_exchanger import MemoryMetricsExchanger | ||
from nvflare.app_common.tracking.tracker_types import LogWriterName | ||
from nvflare.app_common.widgets.streaming import ANALYTIC_EVENT_TYPE | ||
from nvflare.fuel.utils.constants import Mode | ||
from nvflare.fuel.utils.pipe.memory_pipe import MemoryPipe | ||
from nvflare.fuel.utils.pipe.pipe_handler import PipeHandler | ||
|
||
from .metric_receiver import MetricReceiver | ||
|
||
|
||
class MemoryMetricReceiver(MetricReceiver): | ||
def __init__( | ||
self, | ||
metrics_exchanger_id: str, | ||
event_type=ANALYTIC_EVENT_TYPE, | ||
writer_name=LogWriterName.TORCH_TB, | ||
topic: str = "metrics", | ||
get_poll_interval: float = 0.5, | ||
read_interval: float = 0.1, | ||
heartbeat_interval: float = 5.0, | ||
heartbeat_timeout: float = 30.0, | ||
): | ||
"""Metrics receiver with memory pipe. | ||
Args: | ||
event_type (str): event type to fire (defaults to "analytix_log_stats"). | ||
writer_name: the log writer for syntax information (defaults to LogWriterName.TORCH_TB) | ||
""" | ||
super().__init__( | ||
event_type=event_type, | ||
writer_name=writer_name, | ||
topic=topic, | ||
get_poll_interval=get_poll_interval, | ||
read_interval=read_interval, | ||
heartbeat_interval=heartbeat_interval, | ||
heartbeat_timeout=heartbeat_timeout, | ||
) | ||
self.metrics_exchanger_id = metrics_exchanger_id | ||
|
||
self.x_queue = Queue() | ||
self.y_queue = Queue() | ||
|
||
def _init_pipe(self, fl_ctx: FLContext) -> None: | ||
self._pipe = MemoryPipe(x_queue=self.x_queue, y_queue=self.y_queue, mode=Mode.PASSIVE) | ||
|
||
def _create_metrics_exchanger(self): | ||
pipe = MemoryPipe(x_queue=self.x_queue, y_queue=self.y_queue, mode=Mode.ACTIVE) | ||
pipe.open(name=self._pipe_name) | ||
# init pipe handler | ||
pipe_handler = PipeHandler( | ||
pipe, | ||
read_interval=self._read_interval, | ||
heartbeat_interval=self._heartbeat_interval, | ||
heartbeat_timeout=self._heartbeat_timeout, | ||
) | ||
pipe_handler.start() | ||
metrics_exchanger = MemoryMetricsExchanger(pipe_handler=pipe_handler) | ||
return metrics_exchanger | ||
|
||
def handle_event(self, event_type: str, fl_ctx: FLContext): | ||
super().handle_event(event_type, fl_ctx) | ||
if event_type == EventType.ABOUT_TO_START_RUN: | ||
engine = fl_ctx.get_engine() | ||
# inserts MetricsExchanger into engine components | ||
metrics_exchanger = self._create_metrics_exchanger() | ||
all_components = engine.get_all_components() | ||
all_components[self.metrics_exchanger_id] = metrics_exchanger | ||
|
||
def prepare_external_config(self, fl_ctx: FLContext): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from nvflare.apis.dxo import DXO | ||
from nvflare.apis.event_type import EventType | ||
from nvflare.apis.fl_context import FLContext | ||
from nvflare.fuel.utils.constants import PipeChannelName | ||
from nvflare.fuel.utils.pipe.pipe import Message, Pipe | ||
from nvflare.fuel.utils.pipe.pipe_handler import PipeHandler | ||
from nvflare.apis.fl_component import FLComponent | ||
from nvflare.app_common.tracking.analytic_utils import send_analytic_dxo | ||
|
||
|
||
class MetricReceiver(FLComponent): | ||
def __init__( | ||
self, | ||
pipe_id: str, | ||
read_interval=0.1, | ||
heartbeat_interval=5.0, | ||
heartbeat_timeout=30.0, | ||
pipe_channel_name=PipeChannelName.METRIC, | ||
): | ||
super().__init__() | ||
self.pipe_id = pipe_id | ||
self.read_interval = read_interval | ||
self.heartbeat_interval = heartbeat_interval | ||
self.heartbeat_timeout = heartbeat_timeout | ||
self.pipe_channel_name = pipe_channel_name | ||
self.pipe = None | ||
self.pipe_handler = None | ||
self._fl_ctx = None | ||
|
||
def handle_event(self, event_type: str, fl_ctx: FLContext): | ||
if event_type == EventType.START_RUN: | ||
engine = fl_ctx.get_engine() | ||
pipe = engine.get_component(self.pipe_id) | ||
if not isinstance(pipe, Pipe): | ||
self.log_error(fl_ctx, f"component {self.pipe_id} must be Pipe but got {type(pipe)}") | ||
self.system_panic(f"bad component {self.pipe_id}", fl_ctx) | ||
return | ||
self._fl_ctx = fl_ctx | ||
self.pipe = pipe | ||
self.pipe_handler = PipeHandler( | ||
pipe=self.pipe, | ||
read_interval=self.read_interval, | ||
heartbeat_interval=self.heartbeat_interval, | ||
heartbeat_timeout=self.heartbeat_timeout, | ||
) | ||
self.pipe_handler.set_status_cb(self._pipe_status_cb) | ||
self.pipe_handler.set_message_cb(self._pipe_msg_cb) | ||
self.pipe.open(self.pipe_channel_name) | ||
self.pipe_handler.start() | ||
elif event_type == EventType.END_RUN: | ||
self.log_info(fl_ctx, "Stopping pipe handler") | ||
if self.pipe_handler: | ||
self.pipe_handler.notify_end("end_of_job") | ||
self.pipe_handler.stop() | ||
|
||
def _pipe_status_cb(self, msg: Message): | ||
self.logger.info(f"{self.pipe_channel_name} pipe status changed to {msg.topic}") | ||
self.pipe_handler.stop() | ||
|
||
def _pipe_msg_cb(self, msg: Message): | ||
if not isinstance(msg.data, DXO): | ||
self.logger.error(f"bad metric data: expect DXO but got {type(msg.data)}") | ||
self.logger.info(f"received metric record: {msg.topic}: {msg.data.data}") | ||
send_analytic_dxo(self, msg.data, self._fl_ctx) |
Oops, something went wrong.