diff --git a/pkg/metricscollector/v1beta1/tfevent-metricscollector/tfevent_loader.py b/pkg/metricscollector/v1beta1/tfevent-metricscollector/tfevent_loader.py index f41597f9237..0422af2d2bb 100644 --- a/pkg/metricscollector/v1beta1/tfevent-metricscollector/tfevent_loader.py +++ b/pkg/metricscollector/v1beta1/tfevent-metricscollector/tfevent_loader.py @@ -30,11 +30,23 @@ import rfc3339 import tensorflow as tf from tensorboard.backend.event_processing.event_accumulator import EventAccumulator -from tensorboard.backend.event_processing.tag_types import TENSORS +from tensorboard.backend.event_processing.tag_types import SCALARS, TENSORS from pkg.metricscollector.v1beta1.common import const +def _should_consider(tag: str, metric_name: str, tfefile: str) -> bool: + tfefile_parent_dir = ( + os.path.dirname(metric_name) + if len(metric_name.split("/")) >= 2 + else os.path.dirname(tfefile) + ) + basedir_name = os.path.dirname(tfefile) + return tag.startswith(metric_name.split("/")[-1]) and basedir_name.endswith( + tfefile_parent_dir + ) + + class TFEventFileParser: def __init__(self, metric_names): self.metric_names = metric_names @@ -47,21 +59,15 @@ def find_all_files(directory): def parse_summary(self, tfefile): metric_logs = [] - event_accumulator = EventAccumulator(tfefile, size_guidance={TENSORS: 0}) + event_accumulator = EventAccumulator( + tfefile, size_guidance={SCALARS: 0, TENSORS: 0} + ) event_accumulator.Reload() - for tag in event_accumulator.Tags()[TENSORS]: + tags = event_accumulator.Tags() + for tag in tags[TENSORS]: for m in self.metric_names: - tfefile_parent_dir = ( - os.path.dirname(m) - if len(m.split("/")) >= 2 - else os.path.dirname(tfefile) - ) - basedir_name = os.path.dirname(tfefile) - if not tag.startswith(m.split("/")[-1]) or not basedir_name.endswith( - tfefile_parent_dir - ): + if not _should_consider(tag, m, tfefile): continue - for tensor in event_accumulator.Tensors(tag): ml = api_pb2.MetricLog( time_stamp=rfc3339.rfc3339( @@ -72,7 +78,19 @@ def parse_summary(self, tfefile): ), ) metric_logs.append(ml) - + # support old-style tensorboard metrics too + for tag in tags[SCALARS]: + for m in self.metric_names: + if not _should_consider(tag, m, tfefile): + continue + for scalar in event_accumulator.Scalars(tag): + ml = api_pb2.MetricLog( + time_stamp=rfc3339.rfc3339( + datetime.fromtimestamp(scalar.wall_time) + ), + metric=api_pb2.Metric(name=m, value=str(scalar.value)), + ) + metric_logs.append(ml) return metric_logs