From 56919dfd984d39438382807ef6fc8f9d32d49a99 Mon Sep 17 00:00:00 2001 From: Dmitriy Bogun Date: Fri, 19 Aug 2022 12:28:17 +0300 Subject: [PATCH 01/27] Tool for dump/restore OpenTSDB data --- tools/otsdb-dump-restore/.gitignore | 8 + tools/otsdb-dump-restore/Dockerfile.devel | 19 + tools/otsdb-dump-restore/Dockerfile.prod | 9 + tools/otsdb-dump-restore/Makefile | 62 ++++ tools/otsdb-dump-restore/README.md | 1 + .../build-tools/devel-entry-point.sh | 13 + .../build-tools/make_wheel.sh | 11 + .../build-tools/setup-devel-user.sh | 27 ++ tools/otsdb-dump-restore/requirements-dev.txt | 0 tools/otsdb-dump-restore/requirements.txt | 3 + tools/otsdb-dump-restore/run_devel.sh | 7 + tools/otsdb-dump-restore/setup.py | 47 +++ .../otsdb-dump-restore/src/kilda/__init__.py | 1 + .../src/kilda/tsdb_dump_restore/APP_META.json | 1 + .../src/kilda/tsdb_dump_restore/__init__.py | 0 .../src/kilda/tsdb_dump_restore/dump.py | 346 ++++++++++++++++++ .../src/kilda/tsdb_dump_restore/mapping.py | 42 +++ .../src/kilda/tsdb_dump_restore/report.py | 51 +++ .../src/kilda/tsdb_dump_restore/restore.py | 284 ++++++++++++++ .../kilda/tsdb_dump_restore/stats_client.py | 290 +++++++++++++++ .../src/kilda/tsdb_dump_restore/utils.py | 121 ++++++ 21 files changed, 1343 insertions(+) create mode 100644 tools/otsdb-dump-restore/.gitignore create mode 100644 tools/otsdb-dump-restore/Dockerfile.devel create mode 100644 tools/otsdb-dump-restore/Dockerfile.prod create mode 100644 tools/otsdb-dump-restore/Makefile create mode 100644 tools/otsdb-dump-restore/README.md create mode 100755 tools/otsdb-dump-restore/build-tools/devel-entry-point.sh create mode 100755 tools/otsdb-dump-restore/build-tools/make_wheel.sh create mode 100755 tools/otsdb-dump-restore/build-tools/setup-devel-user.sh create mode 100644 tools/otsdb-dump-restore/requirements-dev.txt create mode 100644 tools/otsdb-dump-restore/requirements.txt create mode 100755 tools/otsdb-dump-restore/run_devel.sh create mode 100644 tools/otsdb-dump-restore/setup.py create mode 100644 tools/otsdb-dump-restore/src/kilda/__init__.py create mode 100644 tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/APP_META.json create mode 100644 tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/__init__.py create mode 100644 tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py create mode 100644 tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/mapping.py create mode 100644 tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/report.py create mode 100644 tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/restore.py create mode 100644 tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/stats_client.py create mode 100644 tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/utils.py diff --git a/tools/otsdb-dump-restore/.gitignore b/tools/otsdb-dump-restore/.gitignore new file mode 100644 index 00000000000..d25b7153343 --- /dev/null +++ b/tools/otsdb-dump-restore/.gitignore @@ -0,0 +1,8 @@ +__pycache__/ +*.py[cod] + +.cache/ +.eggs/ +*.egg-info/ +build/ +dist/ diff --git a/tools/otsdb-dump-restore/Dockerfile.devel b/tools/otsdb-dump-restore/Dockerfile.devel new file mode 100644 index 00000000000..827cdb2f65e --- /dev/null +++ b/tools/otsdb-dump-restore/Dockerfile.devel @@ -0,0 +1,19 @@ +FROM python:3.10-slim + +ARG devel_user_name=root +ARG devel_uid=0 +ARG devel_gid=0 + +WORKDIR /kilda +COPY build-tools build-tools +RUN build-tools/setup-devel-user.sh "${devel_user_name}" "${devel_uid}" "${devel_gid}" + +USER ${devel_user_name} + +COPY requirements*.txt ./ +RUN pip install -r requirements.txt -r requirements-dev.txt + +RUN pip install flake8 + +COPY README.md setup.py build-tools ./ +COPY src src diff --git a/tools/otsdb-dump-restore/Dockerfile.prod b/tools/otsdb-dump-restore/Dockerfile.prod new file mode 100644 index 00000000000..fb4e5ab867b --- /dev/null +++ b/tools/otsdb-dump-restore/Dockerfile.prod @@ -0,0 +1,9 @@ +FROM python:3.10-slim + +ARG wheel + +WORKDIR /tmp +COPY dist/${wheel} ./ +RUN pip install ${wheel} + +WORKDIR / diff --git a/tools/otsdb-dump-restore/Makefile b/tools/otsdb-dump-restore/Makefile new file mode 100644 index 00000000000..4f83186f005 --- /dev/null +++ b/tools/otsdb-dump-restore/Makefile @@ -0,0 +1,62 @@ +#!/usr/bin/env make +# Makefile (root) + +# Targets description: +# prod - build a container image with the application, labels it with $(DOCKER_LABEL) +# devel - build container and starts an interactive shell inside it (app dir is attached inside the container as a +# volume, so all changes made to the application code are available inside the container without the need to +# restart/rebuild it). Useful for the application debug. + +APP_NAME := kilda-otsdb-dump-restore +WHEEL_NAME = $(subst -,_,$(APP_NAME)) +DOCKER_LABEL = $(APP_NAME) +ID_DIR := build +DIST_DIR := dist + +SRC_DIR := src +SRC_DIR_PY = $(shell find $(SRC_DIR) -type d \! -name __pycache__ -print) +SRC = requirements*.txt setup.py build-tools/* $(wildcard $(addsuffix /*.py,$(SRC_DIR_PY))) $(shell find $(SRC_DIR) -name APP_META\* -print) + +all: prod + +prod: $(ID_DIR)/prod.iid + docker image tag "$$(cat $<)" "$(DOCKER_LABEL)" + +dist/version: $(ID_DIR)/wheel.cid + docker cp "$$(cat $^):/kilda/dist/version" dist/version.tmp + docker cp "$$(cat $^):/kilda/dist/$(WHEEL_NAME)-$$(cat dist/version.tmp)-py3-none-any.whl" dist/ + mv dist/version.tmp $@ + +$(ID_DIR)/wheel.cid: $(ID_DIR)/wheel.iid | $(DIST_DIR) + if [ -f "$@" ]; then docker rm "$$(cat $@)" && rm "$@"; fi + docker run --cidfile=$@ "$$(cat $^)" /kilda/build-tools/make_wheel.sh + +$(ID_DIR)/devel.cid: $(ID_DIR)/devel.iid + if [ -f "$@" ]; then docker rm "$$(cat $@)" && rm "$@"; fi + docker create -ti \ + --volume="/etc/resolv.conf:/etc/resolv.conf" \ + --volume="$$PWD:/kilda" \ + --network=host \ + --cidfile=$@ "$$(cat $^)" /kilda/build-tools/devel-entry-point.sh + +$(ID_DIR)/devel.iid: Dockerfile.devel | $(ID_DIR) + docker build --iidfile=$@ --file=$< \ + --build-arg=devel_user_name=$$(id -un) \ + --build-arg=devel_uid=$$(id -u) \ + --build-arg=devel_gid=$$(id -g) \ + . + +$(ID_DIR)/wheel.iid: Dockerfile.devel $(SRC) | $(ID_DIR) + docker build --iidfile=$@ --file=$< . + +$(ID_DIR)/prod.iid: Dockerfile.prod dist/version | $(ID_DIR) + docker build --iidfile=$@ --file=$< \ + --build-arg=wheel="$(WHEEL_NAME)-$$(cat dist/version)-py3-none-any.whl" \ + . + +$(ID_DIR): + mkdir -p $@ +$(DIST_DIR): + mkdir -p $@ + +.PHONY: all prod diff --git a/tools/otsdb-dump-restore/README.md b/tools/otsdb-dump-restore/README.md new file mode 100644 index 00000000000..10ef1dd0264 --- /dev/null +++ b/tools/otsdb-dump-restore/README.md @@ -0,0 +1 @@ +# OpenKilda TSDB dump/restore tools diff --git a/tools/otsdb-dump-restore/build-tools/devel-entry-point.sh b/tools/otsdb-dump-restore/build-tools/devel-entry-point.sh new file mode 100755 index 00000000000..8f87a105003 --- /dev/null +++ b/tools/otsdb-dump-restore/build-tools/devel-entry-point.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +cd -- "$(dirname -- "$0")/.." || exit 1 + +python setup.py develop + +pip install pydevd-pycharm~=203.7148.57 + +echo +echo Use \"kilda-otsdb-dump\" or \"kilda-otsdb-restore\" to run application +echo + +exec bash -l diff --git a/tools/otsdb-dump-restore/build-tools/make_wheel.sh b/tools/otsdb-dump-restore/build-tools/make_wheel.sh new file mode 100755 index 00000000000..1a1dfa3fa85 --- /dev/null +++ b/tools/otsdb-dump-restore/build-tools/make_wheel.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +set -eux +cd -- "$(dirname -- "$0")/.." || exit 1 + +python setup.py develop + +flake8 src + +python setup.py bdist_wheel +python setup.py --version > dist/version diff --git a/tools/otsdb-dump-restore/build-tools/setup-devel-user.sh b/tools/otsdb-dump-restore/build-tools/setup-devel-user.sh new file mode 100755 index 00000000000..537ff0d2b95 --- /dev/null +++ b/tools/otsdb-dump-restore/build-tools/setup-devel-user.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +set -eux + +echo "$@" + +DEVEL_NAME="${1}" +DEVEL_UID="${2}" +DEVEL_GID="${3}" + +if [ ${DEVEL_UID} -eq 0 ]; then + # nothing to do for root user + exit +fi + +if [ -z "$(getent group "${DEVEL_NAME}")" ]; then + groupadd -g "${DEVEL_GID}" "${DEVEL_NAME}" +fi +if [ -z "$(getent passwd "${DEVEL_NAME}")" ]; then + useradd -m -u "${DEVEL_UID}" -g "${DEVEL_GID}" -s /bin/bash "${DEVEL_NAME}" +fi + +chown "${DEVEL_UID}:${DEVEL_GID}" \ + /kilda \ + /usr/local \ + /usr/local/bin \ + /usr/local/lib/python3.10/site-packages diff --git a/tools/otsdb-dump-restore/requirements-dev.txt b/tools/otsdb-dump-restore/requirements-dev.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tools/otsdb-dump-restore/requirements.txt b/tools/otsdb-dump-restore/requirements.txt new file mode 100644 index 00000000000..125d8b644c4 --- /dev/null +++ b/tools/otsdb-dump-restore/requirements.txt @@ -0,0 +1,3 @@ +click==7.1.2 +requests==2.28.1 +ndjson==0.3.1 diff --git a/tools/otsdb-dump-restore/run_devel.sh b/tools/otsdb-dump-restore/run_devel.sh new file mode 100755 index 00000000000..8d9d5dd9656 --- /dev/null +++ b/tools/otsdb-dump-restore/run_devel.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +cd -- "$(dirname -- "$0")" || exit 1 + +cid=build/devel.cid +make "${cid}" \ + && exec docker start -ai "$(cat ${cid})" diff --git a/tools/otsdb-dump-restore/setup.py b/tools/otsdb-dump-restore/setup.py new file mode 100644 index 00000000000..92aa6d0281a --- /dev/null +++ b/tools/otsdb-dump-restore/setup.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python + +import functools +import json +import os + +from setuptools import find_packages +from setuptools import setup + +root = os.path.dirname(__file__) +root = os.path.abspath(root) +root = os.path.normpath(root) + +path = functools.partial(os.path.join, root) + +setup_deps = ['wheel'] +with open(path('requirements.txt')) as f: + run_deps = f.readlines() +with open(path('requirements-dev.txt')) as f: + test_deps = f.readlines() + +with open(path('README.md'), 'rt') as f: + README = f.read() + +with open(path('src', 'kilda', 'tsdb_dump_restore', 'APP_META.json'), 'rt') as f: + meta = json.load(f) + +setup( + name='kilda-otsdb-dump-restore', + version=meta['version'], + description='Provide tools to dump and restore OpenTSDB data', + long_description=README, + long_description_content_type='text/markdown', + packages=find_packages('src'), + package_dir={'': 'src'}, + package_data={'': ['*.json']}, + setup_requires=setup_deps, + tests_require=test_deps, + install_requires=run_deps, + entry_points={ + 'console_scripts': [ + 'kilda-otsdb-dump = kilda.tsdb_dump_restore.dump:main', + 'kilda-otsdb-restore = kilda.tsdb_dump_restore.restore:main']}, + classifiers=[ + 'Programming Language :: Python :: 3' + ], +) diff --git a/tools/otsdb-dump-restore/src/kilda/__init__.py b/tools/otsdb-dump-restore/src/kilda/__init__.py new file mode 100644 index 00000000000..de40ea7ca05 --- /dev/null +++ b/tools/otsdb-dump-restore/src/kilda/__init__.py @@ -0,0 +1 @@ +__import__('pkg_resources').declare_namespace(__name__) diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/APP_META.json b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/APP_META.json new file mode 100644 index 00000000000..fb1e02e74dc --- /dev/null +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/APP_META.json @@ -0,0 +1 @@ +{"version": "0.1.dev3"} diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/__init__.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py new file mode 100644 index 00000000000..f46949243bb --- /dev/null +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py @@ -0,0 +1,346 @@ +# dump.py + +import datetime +import pathlib + +import click +import ndjson +import requests + +from kilda.tsdb_dump_restore import mapping +from kilda.tsdb_dump_restore import stats_client +from kilda.tsdb_dump_restore import report +from kilda.tsdb_dump_restore import utils + +ZERO_TIMEDELTA = datetime.timedelta() + + +@click.command() +@click.option( + '--time-stop', type=click.types.DateTime(), metavar='TIME_STOP', + help='timestamp where to stop dumping (by default NOW)') +@click.option( + '--dump-dir', type=click.types.Path(file_okay=False), default='.', + help='location where dump files will be stored') +@click.option( + '--query-frame-size-seconds', type=int, default=180, + help='OpenTSDB query time frame size') +@click.option( + '--metrics-prefix', default='kilda.', + help='only metrics that match this prefix will be dumped') +@click.option( + '--remove-metadata', is_flag=True) +@click.argument('opentsdb_endpoint') +@click.argument( + 'time_start', type=click.types.DateTime(), metavar='TIME_START') +def main(opentsdb_endpoint, time_start, **options): + time_start = time_start.astimezone(datetime.timezone.utc) + + time_stop = options['time_stop'] + if not time_stop: + time_stop = utils.time_now() + else: + time_stop = time_stop.astimezone(datetime.timezone.utc) + + dump_dir = pathlib.Path(options['dump_dir']) + query_frame_size = datetime.timedelta( + seconds=options['query_frame_size_seconds']) + prefix = options['metrics_prefix'] + need_remove_meta = options['remove_metadata'] + + dump_dir.mkdir(exist_ok=True, parents=True) + dump_frame = _TimeFrame(time_start, time_stop) + + rest_statistics = utils.RestStatistics() + statistics = _DumpStatistics(rest_statistics) + + http_session = requests.Session() + http_session.hooks['response'].append( + utils.ResponseStatisticsHook(rest_statistics)) + + client = stats_client.OpenTSDBStatsClient(http_session, opentsdb_endpoint) + + all_metrics_iterator = stats_client.OpenTSDBMetricsList( + http_session, opentsdb_endpoint, prefix=prefix) + for metric in all_metrics_iterator: + dump( + statistics, client, dump_frame, dump_dir, metric, query_frame_size, + need_remove_meta=need_remove_meta) + + +def dump( + statistics, client, dump_frame, dump_location, metric_name, + query_frame_size, need_remove_meta): + meta = _DumpMetadata(metric_name, dump_location) + + try: + last_frame = meta.read() + start = last_frame.start + except ValueError: + start = utils.datetime_align(dump_frame.start, query_frame_size) + + end = dump_frame.end + + query_manager = _AggDataQueryManager() + statistics.evaluate_expected_iterations_count(start, end, query_frame_size) + + stream = build_time_stream(start, end, query_frame_size) + stream = query_data_stream(stream, query_manager) + stream = stats_stream(stream, client, metric_name, query_manager) + + dump_file = dump_location / (metric_name + '.ndjson') + with dump_file.open('at') as target: + if 0 < target.tell(): + # extending existing file, make sure we have line separator before + # new record + target.write('\n') + with DumpProgressReport(metric_name, statistics) as status_report: + _dump_stream(stream, target, meta, status_report, statistics) + + if need_remove_meta: + meta.remove() + + +def _dump_stream(stream, target, meta, status_report, statistics): + writer = ndjson.writer(target) + for frame, stats_entries in stream: + status_report.flush() + for entry in stats_entries: + statistics.add_entry(frame, entry) + writer.writerow(mapping.encode_stats_entry(entry)) + + meta.write(frame) + + status_report.flush() + + +def build_time_stream(start, end, step): + factory = _FrameFactory() + + stream = time_stream(start, step) + stream = finite_time_stream(stream, end) + stream = frame_stream(stream) + stream = frame_overlap_fix_stream( + stream, end_offset=datetime.timedelta(seconds=-1)) + + for frame_start, frame_end in stream: + yield factory.produce(frame_start, frame_end) + + +def time_stream(start, step): + now = start + while True: + yield now + now += step + + +def finite_time_stream(stream, end): + for now in stream: + if end < now: + break + yield now + + +def frame_stream(stream): + start = None + for end in stream: + if start is not None: + yield start, end + start = end + + +def frame_overlap_fix_stream( + stream, start_offset=ZERO_TIMEDELTA, end_offset=ZERO_TIMEDELTA): + for start, end in stream: + yield start + start_offset, end + end_offset + + +def query_data_stream(stream, query_manager): + while True: + for entry in query_manager.flush(): + yield entry + for frame in stream: + yield _QueryData(frame) + break + else: + break + + +def stats_stream(stream, client, metric, query_manager): + for query_data in stream: + tags = {x: '*' for x in query_data.agg_tags} + stats_data = client.query_range( + query_data.frame.start, query_data.frame.end, + metric, tags=tags) + + try: + batches = stats_data.lookup(metric) + except ValueError: + batches = [] + + for entry in batches: + if entry.aggregate_tags: + query_manager.schedule( + _extract_stats_batch_time_frame(entry, query_data.frame), + entry.aggregate_tags) + continue + + yield query_data.frame, stats_client.batch_to_entries(entry) + + +def _extract_stats_batch_time_frame(batch, fallback): + if not batch.values: + return fallback + start = batch.values[0].timestamp + end = batch.values[-1].timestamp + if start == end: + end = start + datetime.timedelta(seconds=1) + + return _TimeFrame( + start, end, step_number=fallback.step_number) + + +class _DumpStatistics: + def __init__(self, rest_statistics): + self.rest = rest_statistics + self.expected_iterations_count = 0 + self.entries_count = 0 + self.current_frame = None + + self._average_frame_time = _SlidingAverage(init=ZERO_TIMEDELTA) + self._average_frame_entries_count = _SlidingAverage() + self._average_frame_entries = 0 + + def evaluate_expected_iterations_count(self, start, end, step): + duration = end - start + iterations = duration // step + if duration % step: + iterations += 1 + + self.expected_iterations_count = iterations + + def add_entry(self, frame, entry): + self.entries_count += 1 + + if self.current_frame is None: + self.current_frame = _DumpFrameStatistics(frame) + + if self.current_frame.update(frame, entry): + return + + next_frame = _DumpFrameStatistics(frame) + next_frame.update(frame, entry) + + delta = next_frame.seen_time - self.current_frame.seen_time + + self._average_frame_time.count(delta) + self._average_frame_entries_count.count( + self.current_frame.entries_count) + + self.current_frame = next_frame + + +class _DumpFrameStatistics: + def __init__(self, frame): + self.frame = frame + self.seen_time = utils.time_now() + + self.entries_count = 0 + + def update(self, frame, entry): + if self.frame != frame: + return False + self.entries_count += 1 + return True + + +class _DumpMetadata(utils.OperationMetadata): + def __init__(self, metric_name, dump_location): + super().__init__( + dump_location / (metric_name + '.dmeta.json'), + dump_location / (metric_name + '.dmeta.json.upd')) + + def _encode(self, entry): + return { + 'start': utils.datetime_to_unixtime(entry.start), + 'end': utils.datetime_to_unixtime(entry.end)} + + def _decode(self, data): + start = utils.unixtime_to_datetime(data['start']) + end = utils.unixtime_to_datetime(data['end']) + return _TimeFrame(start, end) + + +class DumpProgressReport(report.ProgressReportBase): + def __init__(self, metric_name, statistics, **kwargs): + super().__init__(**kwargs) + self._metric_name = metric_name + self.statistics = statistics + + def _format_message(self): + message = ['Dumping "', self._metric_name, '"'] + frame_statistics = self.statistics.current_frame + if frame_statistics is not None: + message.extend(( + ' at ', frame_statistics.frame.start, + ' #', frame_statistics.frame.step_number)) + message.extend(( + ' ', self.statistics.entries_count, ' entries dumped total ', + ' ', self.statistics.rest.requests_count, ' http requests')) + return message + + +class _SlidingAverage: + def __init__(self, entries=0, init=0): + self.entries = entries + self.total = init + + def count(self, value): + self.total += value + self.entries += 1 + + def get_average(self): + if not self.entries: + return self.total + return self.total / self.entries + + +class _AggDataQueryManager: + def __init__(self): + self._queue = [] + + def schedule(self, frame, agg_tags): + self._queue.append(_QueryData(frame, agg_tags)) + + def flush(self): + queue, self._queue = self._queue, [] + return queue + + +class _QueryData: + __slots__ = ('frame', 'agg_tags') + + def __init__(self, frame, agg_tags=None): + if not agg_tags: + agg_tags = set() + self.frame = frame + self.agg_tags = agg_tags + + +class _TimeFrame: + __slots__ = ('start', 'end', 'step_number') + + def __init__(self, start, end, step_number=0): + self.start = start + self.end = end + self.step_number = step_number + + +class _FrameFactory: + def __init__(self, step_number=0): + self.step_now = step_number + + def produce(self, start, end): + frame = _TimeFrame(start, end, self.step_now) + self.step_now += 1 + return frame diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/mapping.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/mapping.py new file mode 100644 index 00000000000..45e0c5746a1 --- /dev/null +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/mapping.py @@ -0,0 +1,42 @@ +# mapping.py + +from kilda.tsdb_dump_restore import stats_client +from kilda.tsdb_dump_restore import utils + + +def encode_stats_entry(entry): + result = {} + _timestamp_adapter.write( + result, utils.datetime_to_unixtime(entry.timestamp)) + _metric_name_adapter.write(result, entry.name) + _tags_adapter.write(result, dict(entry.tags)) + _value_adapter.write(result, entry.value) + + return result + + +def decode_stats_entry(raw): + timestamp = _timestamp_adapter.read(raw) + name = _metric_name_adapter.read(raw) + tags = _tags_adapter.read(raw) + value = _value_adapter.read(raw) + + return stats_client.StatsEntry( + utils.unixtime_to_datetime(timestamp), name, value, tags=tags) + + +class Adapter: + def __init__(self, field): + self._field = field + + def write(self, target, value): + target[self._field] = value + + def read(self, target): + return target[self._field] + + +_timestamp_adapter = Adapter('timestamp') +_metric_name_adapter = Adapter('metric') +_tags_adapter = Adapter('tags') +_value_adapter = Adapter('value') diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/report.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/report.py new file mode 100644 index 00000000000..e310f48124e --- /dev/null +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/report.py @@ -0,0 +1,51 @@ +# report.py + +import sys + + +class ProgressReportBase: + _last_len = 0 + + def __init__(self, stream=None): + if stream is None: + stream = sys.stdout + self.stream = stream + + self._nested_level = 0 + + def __enter__(self): + self._nested_level += 1 + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self._nested_level -= 1 + if 0 < self._nested_level: + return + + self.close() + + def flush(self): + chunks = self._format_message() + chunks = (str(x) for x in chunks if x is not None) + chunks = (str(x) for x in chunks if x) + message = ''.join(chunks) + if not message: + return + actual_len = len(message) + message += ' ' * max(self._last_len - len(message), 0) + try: + if self._last_len: + self.stream.write('\r') + self.stream.write(message) + self.stream.flush() + finally: + self._last_len = actual_len + + def close(self): + self.flush() + if 0 < self._last_len: + self.stream.write('\n') + self.stream.flush() + + def _format_message(self): + raise NotImplementedError() diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/restore.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/restore.py new file mode 100644 index 00000000000..bcaca546f37 --- /dev/null +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/restore.py @@ -0,0 +1,284 @@ +# restore.py + +import os +import pathlib +import re +import json + +import click +import requests + +from kilda.tsdb_dump_restore import mapping +from kilda.tsdb_dump_restore import report +from kilda.tsdb_dump_restore import utils + + +@click.command() +@click.option( + '--dump-dir', type=click.types.Path(file_okay=False), default='.', + help='Location where dump files are stored') +@click.option( + '--put-request-size-limit', type=int, default=4096, + help='Limit for "put" request payload size (bytes)') +@click.argument('opentsdb_endpoint') +def main(opentsdb_endpoint, **options): + dump_dir = pathlib.Path(options['dump_dir']) + batch_size_limit = options['put_request_size_limit'] + + http_session = requests.Session() + + rest_statistics = utils.RestStatistics() + + http_session.hooks['response'].append( + utils.ResponseStatisticsHook(rest_statistics)) + + stream = stream_source(dump_dir) + stream = records_stream_map(stream) + stream = decode_stream_map(stream) + stream = decode_otsdb_stream_map(stream) + stream = collect_batches_stream_map( + stream, batch_size_limit, + lambda: _Batch(prefix=b'[', suffix=b']', separator=b',')) + stream = push_batch_to_otsdb_stream_map( + stream, http_session, opentsdb_endpoint) + + metric_report = None + try: + for batch, descriptors in stream: + for d in descriptors: + metadata = d.stream_entry.metadata + metadata.write(d.offset_end) + + if metric_report is None: + metric_report = RestoreProgressReport(rest_statistics, d) + if metric_report.metric == d.name: + metric_report.update(d) + else: + metric_report.close() + metric_report = RestoreProgressReport(rest_statistics, d) + metric_report.flush() + finally: + if metric_report is not None: + metric_report.close() + + +def stream_source(path): + patters = ( + re.compile(r'^(?P.*)\.ndjson$', re.IGNORECASE),) + + for entry in path.iterdir(): + for p in patters: + m = p.match(entry.name) + if m is None: + continue + name = m.group('name') + yield _StreamEntry(name, entry) + + +def records_stream_map(stream, chunk_size=4096, separator=b'\n'): + for stream_entry in stream: + try: + offset = stream_entry.metadata.read() + except ValueError: + offset = 0 + + with stream_entry.path.open('rb') as fd_stream: + fd_stream.seek(0, os.SEEK_END) + total_size = fd_stream.tell() + fd_stream.seek(offset, os.SEEK_SET) + + chunk = b'' + while True: + data = fd_stream.read(chunk_size) + if not data: + break + + chunk += data + while True: + entry, match, tail = chunk.partition(separator) + if not match: + break + + size = len(entry) + len(match) + yield stream_entry, entry, offset, size, total_size + chunk = tail + offset += size + + if chunk: + size = len(chunk) + len(separator) + yield stream_entry, chunk, offset, size, total_size + + +def decode_stream_map(stream): + for stream_entry, record, offset, size, total_size in stream: + if not record: + continue + data = json.loads(record) + entry = mapping.decode_stats_entry(data) + yield stream_entry, entry, offset, size, total_size + + +def decode_otsdb_stream_map(stream): + for stream_entry, record, offset, size, total_size in stream: + entry = _stats_to_otsdb_json_map(record) + entry = entry.encode('utf-8') + yield stream_entry, entry, offset, size, total_size + + +def _stats_to_otsdb_json_map(stats_entry): + unixtime = utils.datetime_to_unixtime(stats_entry.timestamp) + timestamp = utils.unixtime_to_millis(unixtime) + return json.dumps({ + "metric": stats_entry.name, + "timestamp": timestamp, + "value": stats_entry.value, + "tags": stats_entry.tags}) + + +def collect_batches_stream_map(stream, size_limit, batch_factory): + stream_descriptors = [] + batch = batch_factory() + for stream_entry, record, offset, size, total_size in stream: + # print('A: {} {} {}'.format(name, offset, size)) + batch.add(record) + + if not stream_descriptors: + stream_descriptors = [_StreamChunkDescriptor(stream_entry)] + if stream_descriptors[-1].name != stream_entry.name: + stream_descriptors.append(_StreamChunkDescriptor(stream_entry)) + stream_descriptors[-1].update(offset, size, total_size) + + if batch.size() < size_limit: + continue + + yield batch, stream_descriptors + + batch = batch_factory() + stream_descriptors = [] + + if not batch.is_empty(): + yield batch, stream_descriptors + + +def push_batch_to_otsdb_stream_map(stream, http_session, endpoint): + url = utils.HttpUrlFactory(endpoint).produce('api', 'put') + for batch, descriptors in stream: + payload = batch.assemble() + response = http_session.post( + url, payload, headers={ + 'content-type': 'application/json' + }) + + response.raise_for_status() + yield batch, descriptors + + +class _StreamEntry: + __slots__ = ('name', 'path', 'metadata') + + def __init__(self, name, path): + self.name = name + self.path = path + self.metadata = _RestoreMetadata(path) + + +class _StreamChunkDescriptor: + def __init__(self, stream_entry): + self.stream_entry = stream_entry + self.name = stream_entry.name + + self.offset_start = self.offset_end = -1 + self.stream_size = 0 + self.entries_count = 0 + + def update(self, offset, size, total_size): + if self.offset_start < 0: + # print('C0: {} {}'.format(self.name, offset)) + self.offset_start = offset + self.offset_end = offset + size + else: + # print('C1: {} {}'.format(self.name, offset)) + self.offset_start = min(self.offset_start, offset) + self.offset_end = max(self.offset_end, offset + size) + self.stream_size = total_size + self.entries_count += 1 + # print('CE: {} {}'.format(self.name, self.offset_start)) + + +class _Batch: + def __init__(self, separator='', prefix='', suffix=''): + self._separator = separator + self._prefix = prefix + self._suffix = suffix + + self._size = len(prefix) + len(suffix) + self._entries = [] + + def add(self, entry): + size_diff = len(entry) + if 0 < len(self._entries): + size_diff += len(self._separator) + + self._entries.append(entry) + self._size += size_diff + + def assemble(self): + return (self._prefix + + self._separator.join(self._entries) + + self._suffix) + + def size(self): + return self._size + + def is_empty(self): + return not self._entries + + +class _RestoreMetadata(utils.OperationMetadata): + def __init__(self, path): + super().__init__( + path.parent / (path.name + '.rmeta.json'), + path.parent / (path.name + '.rmeta.json.upd')) + + def _encode(self, entry): + return {'offset': entry} + + def _decode(self, data): + return data['offset'] + + +class RestoreProgressReport(report.ProgressReportBase): + def __init__(self, rest_statistics, stream_descriptor, **kwargs): + super().__init__(**kwargs) + self.rest_statistics = rest_statistics + self.metric = stream_descriptor.name + self._entries_count = stream_descriptor.entries_count + self._stream_descriptor = stream_descriptor + + def update(self, stream_descriptor): + self._stream_descriptor = stream_descriptor + self._entries_count += stream_descriptor.entries_count + + def _format_message(self): + descriptor = self._stream_descriptor + one_percent = descriptor.stream_size / 100 + if 0 < one_percent: + percent = descriptor.offset_end / one_percent + else: + percent = None + if (descriptor.stream_size + and descriptor.stream_size <= descriptor.offset_end): + percent = 100 + + if percent is None: + percent = '???' + else: + percent = '{:.2f}%'.format(percent) + + message = ( + 'Restoring "{}" {} entries processed ' + '({} completed, {} http requests)' + ).format( + self.metric, self._entries_count, percent, + self.rest_statistics.requests_count) + return [message] diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/stats_client.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/stats_client.py new file mode 100644 index 00000000000..60b577aeb8a --- /dev/null +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/stats_client.py @@ -0,0 +1,290 @@ +# stats_tsdb.py + +import collections +from collections import abc +import datetime +import string + +from kilda.tsdb_dump_restore import utils + + +class _StatsClientBase: + def __init__(self, http_session, endpoint): + self._http_session = http_session + self._url_factory = utils.HttpUrlFactory(endpoint) + + +class VictoriaMetricsStatsClient(_StatsClientBase): + def query_range(self, start, end, metric_name, tags=None, is_rate=False): + if not tags: + tags = dict() + response = self._http_session.get( + self._url_factory.produce( + 'api', 'v1', 'query_range', + start=self._format_time_query_arg(start), + end=self._format_time_query_arg(end), + query=self._build_query(metric_name, tags, is_rate))) + response.raise_for_status() + wrapper = self._unpack_response(response.json()) + return self._parse_matrix_response(wrapper, metric_name) + + @classmethod + def _build_query(cls, metric_name, tags, is_rate): + query = [ + cls._escape(metric_name), + '{', + ','.join('{}="{}"'.format( + cls._escape(name), value) + for (name, value) in tags.items()), + '}'] + query = ''.join(query) + if is_rate: + query = 'rate({})'.format(query) + return query + + @staticmethod + def _escape(origin): + return origin.replace('-', r'\-') + + @staticmethod + def _unpack_response(response): + try: + status = response['status'] + except KeyError as e: + raise ValueError( + 'Invalid response format - ' + 'root object have no key "{}"'.format(e)) + if status != 'success': + raise ValueError('Victoriametrics query have failed') + return response['data'] + + @classmethod + def _parse_matrix_response(cls, wrapper, metric_name_fallback): + result_type = wrapper.get('resultType') + if result_type != 'matrix': + raise ValueError( + 'Unexpected API result type "{}"'.format(result_type)) + + by_name = collections.defaultdict(list) + for raw_entry in wrapper['result']: + metric = raw_entry['metric'] + tags = dict(metric) + name = tags.pop('__name__', metric_name_fallback) + values = tuple( + cls._decode_matrix_value(x) + for x in raw_entry['values']) + + by_name[name].append(_StatsEntriesBatch( + name, tags, values, [])) + + return _QueryResult(dict(by_name)) + + @staticmethod + def _decode_matrix_value(raw): + timestamp, value = raw + timestamp = datetime.datetime.fromtimestamp( + timestamp, datetime.timezone.utc) + + return _StatsEntryValue(timestamp, _decode_numeric_value(value)) + + @staticmethod + def _format_time_query_arg(value): + value_utc = value.astimezone(datetime.timezone.utc) + return value_utc.isoformat() + + +class OpenTSDBStatsClient(_StatsClientBase): + def query_range(self, start, end, metric_name, tags=None, is_rate=False): + if not tags: + tags = dict() + + agg_func = 'max' + if is_rate: + agg_func = 'rate' + + response = self._http_session.get( + self._url_factory.produce( + 'api', 'query', + start=self._format_time_query_arg(start), + end=self._format_time_query_arg(end), + m=self._build_query(metric_name, tags, agg_func))) + response.raise_for_status() + return self._parse_query_response(response.json()) + + @classmethod + def _build_query(cls, metric_name, tags, agg_func): + query = [metric_name] + if tags: + query.extend(( + '{', + ','.join('{}={}'.format(name, value) + for (name, value) in tags.items()), + '}')) + query = ''.join(query) + handlers = [agg_func, query] + return ':'.join(handlers) + + @staticmethod + def _format_time_query_arg(value): + return int(utils.datetime_to_unixtime(value)) + + @classmethod + def _parse_query_response(cls, payload): + by_name = collections.defaultdict(list) + for entry in payload: + name = entry['metric'] + tags = entry['tags'] + agg_tags = entry.get('aggregateTags', []) + values = cls._parse_dps(entry['dps']) + by_name[name].append( + _StatsEntriesBatch(name, tags, values, agg_tags)) + return _QueryResult(dict(by_name)) + + @classmethod + def _parse_dps(cls, payload): + if isinstance(payload, abc.Mapping): + return sorted( + cls._parse_dps_stream(payload.items()), + key=lambda x: x.timestamp) + return cls._parse_dps_stream(payload) + + @staticmethod + def _parse_dps_stream(stream): + results = [] + for timestamp, value in stream: + timestamp = int(timestamp) + timestamp = datetime.datetime.fromtimestamp( + timestamp, datetime.timezone.utc) + value = _decode_numeric_value(value) + results.append(_StatsEntryValue(timestamp, value)) + return results + + +class OpenTSDBMetricsList(abc.Iterator): + _allowed_chars = string.ascii_letters + string.digits + '-_./' + + def __init__(self, http_session, endpoint, prefix='', batch_size=25): + self._http_session = http_session + self._url_factory = utils.HttpUrlFactory(endpoint) + self._batch_size = batch_size + + self._known_metrics = set() + self._ready = iter([]) + self._queue = [iter([prefix])] + + self._api_requests_count = 0 + + def __next__(self): + try: + return next(self._ready) + except StopIteration: + pass + + while self._queue: + for prefix in self._queue[0]: + suggests = self._query_suggest(prefix, self._batch_size) + if self._batch_size <= len(suggests): + self._queue.append(self._new_queue_entry(prefix)) + new_entries = set(suggests) - self._known_metrics + if new_entries: + break + else: + self._queue.pop(0) + continue + + self._known_metrics.update(new_entries) + self._ready = iter(new_entries) + break + + return next(self._ready) + + def _query_suggest(self, prefix, limit): + args = {'type': 'metrics', 'q': prefix, 'max': limit} + url = self._url_factory.produce('api', 'suggest', **args) + self._api_requests_count += 1 + response = self._http_session.get(url) + response.raise_for_status() + return set(response.json()) + + @classmethod + def _new_queue_entry(cls, prefix): + return ((prefix + x) for x in cls._allowed_chars) + + +def batch_to_entries(batch): + return [ + StatsEntry( + x.timestamp, batch.name, x.value, batch.tags) + for x in batch.values] + + +class StatsEntry: + __slots__ = ('timestamp', 'name', 'tags', 'value') + + def __init__(self, timestamp, name, value, tags=None): + self.timestamp = timestamp + self.name = name + self.value = value + if not tags: + tags = {} + self.tags = tags + + def __str__(self): + return '{}({}, {}, {}, tags={})'.format( + StatsEntry.__name__, self.timestamp, self.name, + self.value, self.tags) + + +class _QueryResult: + def __init__(self, by_name): + self._by_name = by_name + + def lookup(self, name): + try: + entry = self._by_name[name] + except KeyError: + raise ValueError( + 'There is no metric {!r} in stats results'.format(name)) + return entry + + def lookup_values(self, name): + results = [] + for entry in self.lookup(name): + results.extend(entry.values) + return results + + def lookup_entries(self, name): + results = [] + for batch in self.lookup(name): + results.extend(batch_to_entries(batch)) + return results + + def get_metrics_list(self): + return list(self._by_name) + + +class _StatsEntriesBatch: + __slots__ = ('name', 'tags', 'aggregate_tags', 'values') + + def __init__(self, name, tags, values, aggregate_tags): + self.name = name + self.tags = tags + self.values = values + self.aggregate_tags = aggregate_tags + + +class _StatsEntryValue: + __slots__ = ('timestamp', 'value') + + def __init__(self, timestamp, value): + self.timestamp = timestamp + self.value = value + + +def _decode_numeric_value(origin): + value_int = int(origin) + value_float = float(origin) + + if value_int == value_float: + return value_int + return value_float diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/utils.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/utils.py new file mode 100644 index 00000000000..6502b5424af --- /dev/null +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/utils.py @@ -0,0 +1,121 @@ +# utils.py + +from collections import abc +import datetime +import json +from urllib import parse + +UNIXTIME_ORIGIN = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc) + + +def time_now(): + now = datetime.datetime.now() + return now.astimezone(datetime.timezone.utc) + + +def datetime_to_unixtime(value): + return (value.astimezone(datetime.timezone.utc) - + UNIXTIME_ORIGIN).total_seconds() + + +def datetime_align(origin, align): + origin_delta = origin - UNIXTIME_ORIGIN + r = origin_delta % align + return origin - r + + +def unixtime_to_millis(value): + return int(value * 1000) + + +def unixtime_to_datetime(value): + return (datetime.datetime.utcfromtimestamp(value) + .replace(tzinfo=datetime.timezone.utc)) + + +class HttpUrlFactory: + def __init__(self, base): + endpoint = parse.urlparse(base) + update = {} + if endpoint.path: + update['path'] = self._force_path_encoding(endpoint.path) + if update: + endpoint = endpoint._replace(**update) + self._endpoint = endpoint + + def produce(self, *path, **query_args): + update = {} + if path: + path = '/'.join( + parse.quote(x) for x in path) + update['path'] = '/'.join((self._endpoint.path, path)) + + if query_args: + qs = dict(parse.parse_qs(self._endpoint.query)) + qs.update(query_args) + query = [] + for name, value in qs.items(): + if (isinstance(value, str) + or not isinstance(value, abc.Sequence)): + query.append((name, value)) + else: + for entry in value: + query.append((name, entry)) + update['query'] = parse.urlencode(query, doseq=True) + + return self._endpoint._replace(**update).geturl() + + @staticmethod + def _force_path_encoding(path): + decoded = parse.unquote(path) + return parse.quote(decoded) + + +class RestStatistics: + def __init__(self): + self.requests_count = 0 + + def record_response(self, response): + self.requests_count += 1 + + +class ResponseStatisticsHook: + def __init__(self, statistics): + self._statistics = statistics + + def __call__(self, response, **kwargs): + self._statistics.record_response(response) + + +class OperationMetadata: + def __init__(self, path, path_upd): + self._path = path + self._path_upd = path_upd + + def read(self): + try: + raw = self._read() + except FileNotFoundError as e: + raise ValueError('Metadata not available', e) + return self._decode(raw) + + def write(self, entry): + with self._path_upd.open('wt') as stream: + json.dump(self._encode(entry), stream) + self._path_upd.replace(str(self._path)) + + def remove(self): + try: + self._path.unlink() + except FileNotFoundError: + pass + + def _read(self): + with self._path.open('rt') as stream: + return json.load(stream) + + def _encode(self, entry): + raise NotImplementedError + + def _decode(self, data): + raise NotImplementedError From 8afe463544ce67e08b2558fc9ee5fe688d02ff4c Mon Sep 17 00:00:00 2001 From: Pablo Murillo Date: Thu, 23 Mar 2023 09:46:35 +0100 Subject: [PATCH 02/27] format openTSDB migration tool code * improve help command * extend limit of lines to 129 * remove unused files from prod image --- tools/otsdb-dump-restore/Dockerfile.prod | 3 +- .../build-tools/make_wheel.sh | 2 +- .../src/kilda/tsdb_dump_restore/dump.py | 47 +++++++++---------- .../src/kilda/tsdb_dump_restore/restore.py | 23 +++++---- 4 files changed, 39 insertions(+), 36 deletions(-) diff --git a/tools/otsdb-dump-restore/Dockerfile.prod b/tools/otsdb-dump-restore/Dockerfile.prod index fb4e5ab867b..be8abcde381 100644 --- a/tools/otsdb-dump-restore/Dockerfile.prod +++ b/tools/otsdb-dump-restore/Dockerfile.prod @@ -5,5 +5,4 @@ ARG wheel WORKDIR /tmp COPY dist/${wheel} ./ RUN pip install ${wheel} - -WORKDIR / +RUN rm -rf ${wheel} diff --git a/tools/otsdb-dump-restore/build-tools/make_wheel.sh b/tools/otsdb-dump-restore/build-tools/make_wheel.sh index 1a1dfa3fa85..9ec7d31c776 100755 --- a/tools/otsdb-dump-restore/build-tools/make_wheel.sh +++ b/tools/otsdb-dump-restore/build-tools/make_wheel.sh @@ -5,7 +5,7 @@ cd -- "$(dirname -- "$0")/.." || exit 1 python setup.py develop -flake8 src +flake8 --max-line-length=119 src python setup.py bdist_wheel python setup.py --version > dist/version diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py index f46949243bb..8a912c0a43a 100644 --- a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py @@ -18,33 +18,38 @@ @click.command() @click.option( '--time-stop', type=click.types.DateTime(), metavar='TIME_STOP', - help='timestamp where to stop dumping (by default NOW)') + default=str(datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")), + help='Timestamp where to stop dumping [default: NOW]') @click.option( '--dump-dir', type=click.types.Path(file_okay=False), default='.', - help='location where dump files will be stored') + help='Location where dump files will be stored') @click.option( - '--query-frame-size-seconds', type=int, default=180, - help='OpenTSDB query time frame size') + '--query-frame-size', type=int, default=180, show_default=True, + help='OpenTSDB query time frame size (seconds)') @click.option( - '--metrics-prefix', default='kilda.', - help='only metrics that match this prefix will be dumped') + '--metrics-prefix', default='kilda.', show_default=True, + help='Only metrics that match this prefix will be dumped') @click.option( '--remove-metadata', is_flag=True) @click.argument('opentsdb_endpoint') @click.argument( 'time_start', type=click.types.DateTime(), metavar='TIME_START') def main(opentsdb_endpoint, time_start, **options): - time_start = time_start.astimezone(datetime.timezone.utc) + """ + This tool dumps the data from an OpenTSDB + + OPENTSDB_ENDPOINT openTSDB endpoint - time_stop = options['time_stop'] - if not time_stop: - time_stop = utils.time_now() - else: - time_stop = time_stop.astimezone(datetime.timezone.utc) + TIME_START time since the data is dumped + + Example: + kilda-otsdb-dump http://example.com:4242 2023-03-08 + """ + time_start = time_start.astimezone(datetime.timezone.utc) + time_stop = options['time_stop'].astimezone(datetime.timezone.utc) dump_dir = pathlib.Path(options['dump_dir']) - query_frame_size = datetime.timedelta( - seconds=options['query_frame_size_seconds']) + query_frame_size = datetime.timedelta(seconds=options['query_frame_size']) prefix = options['metrics_prefix'] need_remove_meta = options['remove_metadata'] @@ -55,22 +60,16 @@ def main(opentsdb_endpoint, time_start, **options): statistics = _DumpStatistics(rest_statistics) http_session = requests.Session() - http_session.hooks['response'].append( - utils.ResponseStatisticsHook(rest_statistics)) + http_session.hooks['response'].append(utils.ResponseStatisticsHook(rest_statistics)) client = stats_client.OpenTSDBStatsClient(http_session, opentsdb_endpoint) - all_metrics_iterator = stats_client.OpenTSDBMetricsList( - http_session, opentsdb_endpoint, prefix=prefix) + all_metrics_iterator = stats_client.OpenTSDBMetricsList(http_session, opentsdb_endpoint, prefix=prefix) for metric in all_metrics_iterator: - dump( - statistics, client, dump_frame, dump_dir, metric, query_frame_size, - need_remove_meta=need_remove_meta) + dump(statistics, client, dump_frame, dump_dir, metric, query_frame_size, need_remove_meta=need_remove_meta) -def dump( - statistics, client, dump_frame, dump_location, metric_name, - query_frame_size, need_remove_meta): +def dump(statistics, client, dump_frame, dump_location, metric_name, query_frame_size, need_remove_meta): meta = _DumpMetadata(metric_name, dump_location) try: diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/restore.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/restore.py index bcaca546f37..6a435fea529 100644 --- a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/restore.py +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/restore.py @@ -18,12 +18,20 @@ '--dump-dir', type=click.types.Path(file_okay=False), default='.', help='Location where dump files are stored') @click.option( - '--put-request-size-limit', type=int, default=4096, + '--request-size-limit', type=int, default=4096, show_default=True, help='Limit for "put" request payload size (bytes)') @click.argument('opentsdb_endpoint') def main(opentsdb_endpoint, **options): + """ + This tool restore the data to an OpenTSDB + + OPENTSDB_ENDPOINT openTSDB endpoint + + Example: + kilda-otsdb-restore http://example.com:4242 + """ dump_dir = pathlib.Path(options['dump_dir']) - batch_size_limit = options['put_request_size_limit'] + batch_size_limit = options['request_size_limit'] http_session = requests.Session() @@ -44,7 +52,7 @@ def main(opentsdb_endpoint, **options): metric_report = None try: - for batch, descriptors in stream: + for _, descriptors in stream: for d in descriptors: metadata = d.stream_entry.metadata metadata.write(d.offset_end) @@ -63,11 +71,9 @@ def main(opentsdb_endpoint, **options): def stream_source(path): - patters = ( - re.compile(r'^(?P.*)\.ndjson$', re.IGNORECASE),) - + patterns = (re.compile(r'^(?P.*)\.ndjson$', re.IGNORECASE),) for entry in path.iterdir(): - for p in patters: + for p in patterns: m = p.match(entry.name) if m is None: continue @@ -266,8 +272,7 @@ def _format_message(self): percent = descriptor.offset_end / one_percent else: percent = None - if (descriptor.stream_size - and descriptor.stream_size <= descriptor.offset_end): + if (descriptor.stream_size and descriptor.stream_size <= descriptor.offset_end): percent = 100 if percent is None: From 0e434db97a0cad0477e340ffb66b3fb0f4766949 Mon Sep 17 00:00:00 2001 From: Pablo Murillo Date: Thu, 23 Mar 2023 09:50:12 +0100 Subject: [PATCH 03/27] Update README.md OpenTSDB migration tool This commit explains how to use the opentsdb dump and restore tool. --- tools/otsdb-dump-restore/README.md | 112 ++++++++++++++++++++++++++++- 1 file changed, 111 insertions(+), 1 deletion(-) diff --git a/tools/otsdb-dump-restore/README.md b/tools/otsdb-dump-restore/README.md index 10ef1dd0264..99efa57c590 100644 --- a/tools/otsdb-dump-restore/README.md +++ b/tools/otsdb-dump-restore/README.md @@ -1 +1,111 @@ -# OpenKilda TSDB dump/restore tools +# OpenKilda TSDB dump/restore tool + +This packet contains python package that provide `kilda-otsdb-dump-restore` CLI tool. The tool itself +provides following commands: +* `kilda-otsdb-dump` download data from an OpenTSDB endpoint. +* `kilda-otsdb-restore` restore data to an OpenTSDB capable database. + +## Build +- `make prod` builds a container image of the application and label it with kilda-otsdb-dump-restore. +- `make devel` builds a container and start an interactive shell inside it, allowing the app directory to be attached as a volume inside the container. +This means that any changes made to the application code will be available inside the container without the need for a restart or rebuild, making it a useful tool for debugging. + +## Usage +The tool is inside a Docker container. To use the it, run the following command in your terminal: +``` +docker run kilda-otsdb-dump-restore +``` +Note: if you need to run it in localhost, you need to add the `--network="host"` docker flag. + +### Dump data from OpenTSDB: +Use `kilda-otsdb-dump` command to dump data from an OpenTSDB. +``` +Usage: kilda-otsdb-dump [OPTIONS] OPENTSDB_ENDPOINT TIME_START + + This tool dumps the data from an OpenTSDB + + OPENTSDB_ENDPOINT openTSDB endpoint + + TIME_START time since the data is dumped + + Example: kilda-otsdb-dump http://example.com:4242 2023-03-08 + +Options: + --time-stop TIME_STOP Timestamp where to stop dumping [default: NOW] + --dump-dir DIRECTORY Location where dump files will be stored + --query-frame-size INTEGER OpenTSDB query time frame size (seconds) + [default: 180] + --metrics-prefix TEXT Only metrics that match this prefix will be + dumped [default: kilda.] + --remove-metadata + --help Show this message and exit. +``` + +### Restore data to OpenTSDB: +Use `kilda-otsdb-restore` command to restore data previously dumped to an OpenTSDB. +``` +Usage: kilda-otsdb-restore [OPTIONS] OPENTSDB_ENDPOINT + + This tool restore the data to an OpenTSDB + + OPENTSDB_ENDPOINT openTSDB endpoint + + Example: kilda-otsdb-restore http://example.com:4242 + +Options: + --dump-dir DIRECTORY Location where dump files are stored + --request-size-limit INTEGER Limit for "put" request payload size (bytes) + [default: 4096] + --help Show this message and exit. +``` +### Date and time formats +``` +%Y-%m-%d, %Y-%m-%dT%H:%M:%S, %Y-%m-%d %H:%M:%S +``` +- "2023-03-22" + +- "2023-03-22T23:59:59" + +- "2023-03-22 23:59:59" + +### Example of use: +Scenario: +* One OpenTSDB service that has stored data. +* One empty VictoriaMetrics service with OpenTSDB capabilites. +* We want to migrate the data from the OpenTSDB to the VictoriaMetric service. + +This could be a posible workflow: +1. Create volume to store data +```bash +docker volume create opentsdb-data +``` +2. Next, we need to dump the data from OpenTSDB. The following command gets the data from a specified time until now and save it to disk: +```bash +docker run --rm -v opentsdb-data:/tmp kilda-otsdb-dump-restore kilda-otsdb-dump http://opentsdb:4242 "2023-03-08" +``` +3. After dumping the data, we can restore it using the following command: +```bash +docker run --rm -v opentsdb-data:/tmp kilda-otsdb-dump-restore kilda-otsdb-restore http://victoria:4242 +``` +4. Finally, we can remove the volume using the following command: +```bash +docker volume remove opentsdb-data +``` + +Another approach would be to use a loop that iterates over a time range, such as days. This method could be beneficial when migrating a large amount of data. +1. Create volume to store data +```bash +docker volume create opentsdb-data +``` +2. Next, we need to dump the data from OpenTSDB. Use the following command to get the data from a specified time until the --time-stop time and save it to disk: +```bash +docker run --rm -v opentsdb-data:/tmp kilda-otsdb-dump-restore kilda-otsdb-dump --time-stop "2023-03-22T11:00:00" http://opentsdb:4242 "2023-03-22T00:00:00" +``` +3. After dumping the data, we can restore it using the following command: +```bash +docker run --rm -v opentsdb-data:/tmp kilda-otsdb-dump-restore kilda-otsdb-restore http://victoria:4242 +``` +4. Finally, we can remove the volume using the following command: +```bash +docker volume remove opentsdb-data +``` From 36fdee3f46c59cac750e0da2435c201822f93aaf Mon Sep 17 00:00:00 2001 From: Pablo Murillo Date: Thu, 1 Jun 2023 15:44:51 +0200 Subject: [PATCH 04/27] add script to migrate data --- tools/otsdb-dump-restore/otsdb-to-vm.sh | 79 +++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100755 tools/otsdb-dump-restore/otsdb-to-vm.sh diff --git a/tools/otsdb-dump-restore/otsdb-to-vm.sh b/tools/otsdb-dump-restore/otsdb-to-vm.sh new file mode 100755 index 00000000000..59a10343015 --- /dev/null +++ b/tools/otsdb-dump-restore/otsdb-to-vm.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +# ./otsdb-to-vm.sh opentsdb.example.com:4242 victoria-metrics.example.com:8428 2022-01-01 2022-01-31 rigel. day +# This command will migrate data from OpenTSDB running at "opentsdb.example.com:4242" +#to Victoria Metrics running at "victoria-metrics.example.com:8428" for the time period +#between January 1, 2022 and January 31, 2022, using the metrics prefix "my-metrics-prefix" +#and a time interval of one day. You can customize the command by replacing the parameters +#with your own values. + +# Check for required parameters +# if [ "$#" -lt 5 ] || [ "$#" -gt 6 ]; then +# echo "Usage: $0 [hour|day]" +# exit 1 +# fi + + +# Set parameters +opentsdb_endpoint="$1" +victoria_metrics_endpoint="$2" +start_date="$3" +end_date="$4" +metrics_prefix="$5" +interval="${6:-day}" + +# Set time interval +case $interval in + hour) + interval_format="%Y-%m-%dT%H:00:00" + increment="+1H" + ;; + day) + interval_format="%Y-%m-%d" + increment="+1d" + ;; + *) + echo "Invalid interval: $interval" + exit 1 + ;; +esac + + + +# Define function to dump data from OpenTSDB +function dump_data { + docker run --rm -v "opentsdb-data-${5}":/tmp kilda-otsdb-dump-restore kilda-otsdb-dump --metrics-prefix "${2}" --time-stop "${3}" "${4}" "${1}" +} + +# Define function to restore data to Victoria Metrics +function restore_data { + docker run --rm -v "opentsdb-data-${2}":/tmp kilda-otsdb-dump-restore kilda-otsdb-restore "${1}" && docker volume rm "opentsdb-data-${2}" || echo "Failed to restore data to Victoria Metrics" >&2 +} + +function increment_date() +{ + local __resultvar=$1 + eval $__resultvar=$(date -j -v ${increment} -f "${interval_format}" "${start_date}" +"${interval_format}") +} + +# Loop through dates +while [[ "$start_date" < "$end_date" ]]; do + # trim : from date + volume_subfix=$(echo "${start_date}" | tr -d :) + # Create Docker volume for this iteration + docker volume create "opentsdb-data-${volume_subfix}" + + # Calculate end date for this iteration + increment_date interval_end_date + + echo "Dumping data from ${start_date} to ${interval_end_date}" + dump_data "${start_date}" "${metrics_prefix}" "${interval_end_date}" "${opentsdb_endpoint}" "${volume_subfix}" + + echo "Restoring data from ${start_date} to ${interval_end_date} in background" + restore_data "${victoria_metrics_endpoint}" "${volume_subfix}" & + + # Increment date by time interval + increment_date start_date +done + +wait \ No newline at end of file From f1a698a669d8ce4b5fb84b35792a6232e638dd12 Mon Sep 17 00:00:00 2001 From: Pablo Murillo Nogales Date: Thu, 1 Jun 2023 14:39:27 +0000 Subject: [PATCH 05/27] fix date command to work in linux --- tools/otsdb-dump-restore/otsdb-to-vm.sh | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tools/otsdb-dump-restore/otsdb-to-vm.sh b/tools/otsdb-dump-restore/otsdb-to-vm.sh index 59a10343015..a06f357f4b6 100755 --- a/tools/otsdb-dump-restore/otsdb-to-vm.sh +++ b/tools/otsdb-dump-restore/otsdb-to-vm.sh @@ -26,11 +26,11 @@ interval="${6:-day}" case $interval in hour) interval_format="%Y-%m-%dT%H:00:00" - increment="+1H" + increment="1 hour" ;; day) interval_format="%Y-%m-%d" - increment="+1d" + increment="1 days" ;; *) echo "Invalid interval: $interval" @@ -39,23 +39,28 @@ case $interval in esac +if [[ "$(docker images -q kilda-otsdb-dump-restore 2> /dev/null)" == "" ]]; then + echo "Docker image kilda-otsdb-dump-restore not found. Please build it first." >&2 + exit 1 +fi # Define function to dump data from OpenTSDB function dump_data { - docker run --rm -v "opentsdb-data-${5}":/tmp kilda-otsdb-dump-restore kilda-otsdb-dump --metrics-prefix "${2}" --time-stop "${3}" "${4}" "${1}" + docker run --rm --network="host" -v "opentsdb-data-${5}":/tmp kilda-otsdb-dump-restore kilda-otsdb-dump --metrics-prefix "${2}" --time-stop "${3}" "${4}" "${1}" } # Define function to restore data to Victoria Metrics function restore_data { - docker run --rm -v "opentsdb-data-${2}":/tmp kilda-otsdb-dump-restore kilda-otsdb-restore "${1}" && docker volume rm "opentsdb-data-${2}" || echo "Failed to restore data to Victoria Metrics" >&2 + docker run --rm --network="host" -v "opentsdb-data-${2}":/tmp kilda-otsdb-dump-restore kilda-otsdb-restore "${1}" && docker volume rm "opentsdb-data-${2}" || echo "Failed to restore data to Victoria Metrics" >&2 } function increment_date() { local __resultvar=$1 - eval $__resultvar=$(date -j -v ${increment} -f "${interval_format}" "${start_date}" +"${interval_format}") + eval $__resultvar=$(date -d "${start_date} ${increment}" +${interval_format}) } + # Loop through dates while [[ "$start_date" < "$end_date" ]]; do # trim : from date From 4fd4596e07b907f8c97faf8a48a354c01bc80100 Mon Sep 17 00:00:00 2001 From: Pablo Murillo Date: Mon, 5 Jun 2023 11:59:00 +0200 Subject: [PATCH 06/27] add otsdb-to-vm script to README --- tools/otsdb-dump-restore/README.md | 23 ++++++++++++++++++ tools/otsdb-dump-restore/otsdb-to-vm.sh | 31 +++++++++++++++++++------ 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/tools/otsdb-dump-restore/README.md b/tools/otsdb-dump-restore/README.md index 99efa57c590..cadaa83f969 100644 --- a/tools/otsdb-dump-restore/README.md +++ b/tools/otsdb-dump-restore/README.md @@ -109,3 +109,26 @@ docker run --rm -v opentsdb-data:/tmp kilda-otsdb-dump-restore kilda-otsdb-resto ```bash docker volume remove opentsdb-data ``` + +### How to use the otsdb-to-vm script +The otsdb-to-vm script is a wrapper around the kilda-otsdb-dump-restore tool. It is used to dump data from an OpenTSDB and restore it to a VictoriaMetrics service. +```bash +Usage: otsdb-to-vm OPENTSDB_ENDPOINT VICTORIA_ENDPOINT TIME_START TIME_STOP [hour|day] + + This tool dumps the data from an OpenTSDB and restore it to a VictoriaMetrics service. + + OPENTSDB_ENDPOINT openTSDB endpoint + + VICTORIA_ENDPOINT VictoriaMetrics endpoint + + TIME_START time since the data is dumped + + DATE_STOP time where to stop dumping + + [hour|day] time frame size + + Examples: + + ./otsdb-to-vm.sh opentsdb.example.com:4242 victoria-metrics.example.com:4242 2022-01-01 2022-01-31 kilda. day + ./otsdb-to-vm.sh opentsdb.example.com:4242 victoria-metrics.example.com:4242 2022-01-01T00:00:00 2022-01-01T23:59:59 kilda. hour +``` diff --git a/tools/otsdb-dump-restore/otsdb-to-vm.sh b/tools/otsdb-dump-restore/otsdb-to-vm.sh index a06f357f4b6..42cd0c773be 100755 --- a/tools/otsdb-dump-restore/otsdb-to-vm.sh +++ b/tools/otsdb-dump-restore/otsdb-to-vm.sh @@ -1,17 +1,34 @@ #!/bin/bash -# ./otsdb-to-vm.sh opentsdb.example.com:4242 victoria-metrics.example.com:8428 2022-01-01 2022-01-31 rigel. day +# ./otsdb-to-vm.sh opentsdb.example.com:4242 victoria-metrics.example.com:4242 2022-01-01 2022-01-31 kilda. day # This command will migrate data from OpenTSDB running at "opentsdb.example.com:4242" -#to Victoria Metrics running at "victoria-metrics.example.com:8428" for the time period +#to Victoria Metrics running at "victoria-metrics.example.com:4242" for the time period #between January 1, 2022 and January 31, 2022, using the metrics prefix "my-metrics-prefix" #and a time interval of one day. You can customize the command by replacing the parameters #with your own values. # Check for required parameters -# if [ "$#" -lt 5 ] || [ "$#" -gt 6 ]; then -# echo "Usage: $0 [hour|day]" -# exit 1 -# fi +if [ "$#" -lt 5 ] || [ "$#" -gt 6 ]; then + echo "Usage: $0 OPENTSDB_ENDPOINT VICTORIA_ENDPOINT TIME_START TIME_STOP [hour|day] + + This tool dumps the data from an OpenTSDB and restore it to a VictoriaMetrics service. + + OPENTSDB_ENDPOINT openTSDB endpoint + + VICTORIA_ENDPOINT VictoriaMetrics endpoint + + TIME_START time since the data is dumped + + DATE_STOP time where to stop dumping + + [hour|day] time frame size + + Examples: + + ./otsdb-to-vm.sh opentsdb.example.com:4242 victoria-metrics.example.com:4242 2022-01-01 2022-01-31 kilda. day + ./otsdb-to-vm.sh opentsdb.example.com:4242 victoria-metrics.example.com:4242 2022-01-01T00:00:00 2022-01-01T23:59:59 kilda. hour" + exit 1 +fi # Set parameters @@ -81,4 +98,4 @@ while [[ "$start_date" < "$end_date" ]]; do increment_date start_date done -wait \ No newline at end of file +wait From de490afb6abdf809a5fd8f6cb0220575d049e5b1 Mon Sep 17 00:00:00 2001 From: Pablo Murillo Date: Tue, 6 Jun 2023 14:10:49 +0200 Subject: [PATCH 07/27] add retries for http, fix group check for devel --- tools/otsdb-dump-restore/build-tools/setup-devel-user.sh | 3 ++- tools/otsdb-dump-restore/otsdb-to-vm.sh | 2 +- .../otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py | 6 ++++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/otsdb-dump-restore/build-tools/setup-devel-user.sh b/tools/otsdb-dump-restore/build-tools/setup-devel-user.sh index 537ff0d2b95..2d8e8a62c49 100755 --- a/tools/otsdb-dump-restore/build-tools/setup-devel-user.sh +++ b/tools/otsdb-dump-restore/build-tools/setup-devel-user.sh @@ -13,9 +13,10 @@ if [ ${DEVEL_UID} -eq 0 ]; then exit fi -if [ -z "$(getent group "${DEVEL_NAME}")" ]; then +if [ -z "$(getent group "${DEVEL_GID}")" ]; then groupadd -g "${DEVEL_GID}" "${DEVEL_NAME}" fi + if [ -z "$(getent passwd "${DEVEL_NAME}")" ]; then useradd -m -u "${DEVEL_UID}" -g "${DEVEL_GID}" -s /bin/bash "${DEVEL_NAME}" fi diff --git a/tools/otsdb-dump-restore/otsdb-to-vm.sh b/tools/otsdb-dump-restore/otsdb-to-vm.sh index 42cd0c773be..3e23ca11732 100755 --- a/tools/otsdb-dump-restore/otsdb-to-vm.sh +++ b/tools/otsdb-dump-restore/otsdb-to-vm.sh @@ -92,7 +92,7 @@ while [[ "$start_date" < "$end_date" ]]; do dump_data "${start_date}" "${metrics_prefix}" "${interval_end_date}" "${opentsdb_endpoint}" "${volume_subfix}" echo "Restoring data from ${start_date} to ${interval_end_date} in background" - restore_data "${victoria_metrics_endpoint}" "${volume_subfix}" & + # restore_data "${victoria_metrics_endpoint}" "${volume_subfix}" & # Increment date by time interval increment_date start_date diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py index 8a912c0a43a..7123f24df4f 100644 --- a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py @@ -6,6 +6,8 @@ import click import ndjson import requests +from requests.adapters import HTTPAdapter, Retry + from kilda.tsdb_dump_restore import mapping from kilda.tsdb_dump_restore import stats_client @@ -62,6 +64,10 @@ def main(opentsdb_endpoint, time_start, **options): http_session = requests.Session() http_session.hooks['response'].append(utils.ResponseStatisticsHook(rest_statistics)) + retries = Retry(total=5, backoff_factor=0.5, status_forcelist=[424]) + + http_session.mount('http://', HTTPAdapter(max_retries=retries)) + client = stats_client.OpenTSDBStatsClient(http_session, opentsdb_endpoint) all_metrics_iterator = stats_client.OpenTSDBMetricsList(http_session, opentsdb_endpoint, prefix=prefix) From 547c3375f10fe3349d32142e68a31eeeef3b444a Mon Sep 17 00:00:00 2001 From: Pablo Murillo Date: Thu, 8 Jun 2023 18:32:05 +0200 Subject: [PATCH 08/27] add concurrency and use csv instead of json --- .../src/kilda/tsdb_dump_restore/constants.py | 6 +++ .../src/kilda/tsdb_dump_restore/dump.py | 24 +++++++++-- .../src/kilda/tsdb_dump_restore/mapping.py | 41 +++++++++++++------ .../src/kilda/tsdb_dump_restore/restore.py | 4 +- 4 files changed, 57 insertions(+), 18 deletions(-) create mode 100644 tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/constants.py diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/constants.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/constants.py new file mode 100644 index 00000000000..ac9700669ed --- /dev/null +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/constants.py @@ -0,0 +1,6 @@ +# constants.py + +TIMESTAMP_FIELD = 'timestamp' +METRIC_NAME_FIELD = 'metric_name' +TAGS_FIELD = 'tags' +VALUE_FIELD = 'value' \ No newline at end of file diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py index 7123f24df4f..1b4d8283d62 100644 --- a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py @@ -2,11 +2,14 @@ import datetime import pathlib +import concurrent.futures import click import ndjson +import csv import requests from requests.adapters import HTTPAdapter, Retry +from threading import Thread from kilda.tsdb_dump_restore import mapping @@ -33,6 +36,9 @@ help='Only metrics that match this prefix will be dumped') @click.option( '--remove-metadata', is_flag=True) +@click.option( + '--concurrent', type=int, default=1, show_default=True, + help='Number of concurrent threads') @click.argument('opentsdb_endpoint') @click.argument( 'time_start', type=click.types.DateTime(), metavar='TIME_START') @@ -47,6 +53,7 @@ def main(opentsdb_endpoint, time_start, **options): Example: kilda-otsdb-dump http://example.com:4242 2023-03-08 """ + time_start = time_start.astimezone(datetime.timezone.utc) time_stop = options['time_stop'].astimezone(datetime.timezone.utc) @@ -54,6 +61,7 @@ def main(opentsdb_endpoint, time_start, **options): query_frame_size = datetime.timedelta(seconds=options['query_frame_size']) prefix = options['metrics_prefix'] need_remove_meta = options['remove_metadata'] + concurrent_executors = options['concurrent'] dump_dir.mkdir(exist_ok=True, parents=True) dump_frame = _TimeFrame(time_start, time_stop) @@ -69,11 +77,19 @@ def main(opentsdb_endpoint, time_start, **options): http_session.mount('http://', HTTPAdapter(max_retries=retries)) client = stats_client.OpenTSDBStatsClient(http_session, opentsdb_endpoint) - all_metrics_iterator = stats_client.OpenTSDBMetricsList(http_session, opentsdb_endpoint, prefix=prefix) - for metric in all_metrics_iterator: - dump(statistics, client, dump_frame, dump_dir, metric, query_frame_size, need_remove_meta=need_remove_meta) + concurrent_dump(all_metrics_iterator, statistics, client, dump_frame, dump_dir, query_frame_size, need_remove_meta, concurrent_executors) + +def concurrent_dump(all_metrics_iterator, statistics, client, dump_frame, dump_dir, query_frame_size, need_remove_meta, concurrent_executors): + with concurrent.futures.ThreadPoolExecutor(max_workers=concurrent_executors) as executor: + futures = [] + for metric in all_metrics_iterator: + futures.append(executor.submit(dump, statistics, client, dump_frame, dump_dir, metric, query_frame_size, need_remove_meta)) + # concurrent.futures.wait(futures) + concurrent.futures.as_completed(futures) + # for _ in concurrent.futures.as_completed(futures): + # print("OK") def dump(statistics, client, dump_frame, dump_location, metric_name, query_frame_size, need_remove_meta): meta = _DumpMetadata(metric_name, dump_location) @@ -107,7 +123,7 @@ def dump(statistics, client, dump_frame, dump_location, metric_name, query_frame def _dump_stream(stream, target, meta, status_report, statistics): - writer = ndjson.writer(target) + writer = mapping.get_csv_writer(target) for frame, stats_entries in stream: status_report.flush() for entry in stats_entries: diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/mapping.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/mapping.py index 45e0c5746a1..051a346c042 100644 --- a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/mapping.py +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/mapping.py @@ -1,8 +1,11 @@ # mapping.py +from kilda.tsdb_dump_restore import constants from kilda.tsdb_dump_restore import stats_client from kilda.tsdb_dump_restore import utils +import csv +import ast def encode_stats_entry(entry): result = {} @@ -14,15 +17,22 @@ def encode_stats_entry(entry): return result - -def decode_stats_entry(raw): - timestamp = _timestamp_adapter.read(raw) - name = _metric_name_adapter.read(raw) - tags = _tags_adapter.read(raw) - value = _value_adapter.read(raw) - +def get_csv_writer(file): + return csv.DictWriter( + file, + fieldnames=_fieldnames, + delimiter='|', + extrasaction='ignore') + +def decode_raw_cvs_row(raw): + row = raw.decode('utf-8') + row = row.split(_delimiter) + timestamp = float(row[0]) + metric_name = row[1] + tags = ast.literal_eval(row[2]) + value = int(row[3]) return stats_client.StatsEntry( - utils.unixtime_to_datetime(timestamp), name, value, tags=tags) + utils.unixtime_to_datetime(timestamp), metric_name, value, tags=tags) class Adapter: @@ -36,7 +46,14 @@ def read(self, target): return target[self._field] -_timestamp_adapter = Adapter('timestamp') -_metric_name_adapter = Adapter('metric') -_tags_adapter = Adapter('tags') -_value_adapter = Adapter('value') +_timestamp_adapter = Adapter(constants.TIMESTAMP_FIELD) +_metric_name_adapter = Adapter(constants.METRIC_NAME_FIELD) +_tags_adapter = Adapter(constants.TAGS_FIELD) +_value_adapter = Adapter(constants.VALUE_FIELD) +_fieldnames=[ + constants.TIMESTAMP_FIELD, + constants.METRIC_NAME_FIELD, + constants.TAGS_FIELD, + constants.VALUE_FIELD, + ] +_delimiter='|' \ No newline at end of file diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/restore.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/restore.py index 6a435fea529..205dd231411 100644 --- a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/restore.py +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/restore.py @@ -4,6 +4,7 @@ import pathlib import re import json +import csv import click import requests @@ -119,8 +120,7 @@ def decode_stream_map(stream): for stream_entry, record, offset, size, total_size in stream: if not record: continue - data = json.loads(record) - entry = mapping.decode_stats_entry(data) + entry = mapping.decode_raw_cvs_row(record) yield stream_entry, entry, offset, size, total_size From 17b08c5ae9b60e799609aaaf17e354e306aab078 Mon Sep 17 00:00:00 2001 From: Pablo Murillo Date: Fri, 9 Jun 2023 09:15:14 +0200 Subject: [PATCH 09/27] fix style --- .../src/kilda/tsdb_dump_restore/constants.py | 2 +- .../src/kilda/tsdb_dump_restore/dump.py | 15 ++++++++------- .../src/kilda/tsdb_dump_restore/mapping.py | 7 +++++-- .../src/kilda/tsdb_dump_restore/restore.py | 1 - 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/constants.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/constants.py index ac9700669ed..abeffa62f4f 100644 --- a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/constants.py +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/constants.py @@ -3,4 +3,4 @@ TIMESTAMP_FIELD = 'timestamp' METRIC_NAME_FIELD = 'metric_name' TAGS_FIELD = 'tags' -VALUE_FIELD = 'value' \ No newline at end of file +VALUE_FIELD = 'value' diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py index 1b4d8283d62..b2bfd23b344 100644 --- a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py @@ -5,12 +5,8 @@ import concurrent.futures import click -import ndjson -import csv import requests from requests.adapters import HTTPAdapter, Retry -from threading import Thread - from kilda.tsdb_dump_restore import mapping from kilda.tsdb_dump_restore import stats_client @@ -79,18 +75,23 @@ def main(opentsdb_endpoint, time_start, **options): client = stats_client.OpenTSDBStatsClient(http_session, opentsdb_endpoint) all_metrics_iterator = stats_client.OpenTSDBMetricsList(http_session, opentsdb_endpoint, prefix=prefix) - concurrent_dump(all_metrics_iterator, statistics, client, dump_frame, dump_dir, query_frame_size, need_remove_meta, concurrent_executors) + concurrent_dump(all_metrics_iterator, statistics, client, dump_frame, + dump_dir, query_frame_size, need_remove_meta, concurrent_executors) + -def concurrent_dump(all_metrics_iterator, statistics, client, dump_frame, dump_dir, query_frame_size, need_remove_meta, concurrent_executors): +def concurrent_dump(all_metrics_iterator, statistics, client, dump_frame, dump_dir, + query_frame_size, need_remove_meta, concurrent_executors): with concurrent.futures.ThreadPoolExecutor(max_workers=concurrent_executors) as executor: futures = [] for metric in all_metrics_iterator: - futures.append(executor.submit(dump, statistics, client, dump_frame, dump_dir, metric, query_frame_size, need_remove_meta)) + futures.append(executor.submit(dump, statistics, client, dump_frame, + dump_dir, metric, query_frame_size, need_remove_meta)) # concurrent.futures.wait(futures) concurrent.futures.as_completed(futures) # for _ in concurrent.futures.as_completed(futures): # print("OK") + def dump(statistics, client, dump_frame, dump_location, metric_name, query_frame_size, need_remove_meta): meta = _DumpMetadata(metric_name, dump_location) diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/mapping.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/mapping.py index 051a346c042..2d6293d1ce6 100644 --- a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/mapping.py +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/mapping.py @@ -7,6 +7,7 @@ import csv import ast + def encode_stats_entry(entry): result = {} _timestamp_adapter.write( @@ -17,6 +18,7 @@ def encode_stats_entry(entry): return result + def get_csv_writer(file): return csv.DictWriter( file, @@ -24,6 +26,7 @@ def get_csv_writer(file): delimiter='|', extrasaction='ignore') + def decode_raw_cvs_row(raw): row = raw.decode('utf-8') row = row.split(_delimiter) @@ -50,10 +53,10 @@ def read(self, target): _metric_name_adapter = Adapter(constants.METRIC_NAME_FIELD) _tags_adapter = Adapter(constants.TAGS_FIELD) _value_adapter = Adapter(constants.VALUE_FIELD) -_fieldnames=[ +_fieldnames = [ constants.TIMESTAMP_FIELD, constants.METRIC_NAME_FIELD, constants.TAGS_FIELD, constants.VALUE_FIELD, ] -_delimiter='|' \ No newline at end of file +_delimiter = '|' diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/restore.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/restore.py index 205dd231411..629abd0dfdd 100644 --- a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/restore.py +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/restore.py @@ -4,7 +4,6 @@ import pathlib import re import json -import csv import click import requests From cbf21e48c19b9b7c6121e128ed815e43e5dae6d6 Mon Sep 17 00:00:00 2001 From: Pablo Murillo Date: Fri, 9 Jun 2023 09:29:25 +0200 Subject: [PATCH 10/27] add concurrent option to the script --- tools/otsdb-dump-restore/otsdb-to-vm.sh | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tools/otsdb-dump-restore/otsdb-to-vm.sh b/tools/otsdb-dump-restore/otsdb-to-vm.sh index 3e23ca11732..efe10282607 100755 --- a/tools/otsdb-dump-restore/otsdb-to-vm.sh +++ b/tools/otsdb-dump-restore/otsdb-to-vm.sh @@ -8,8 +8,8 @@ #with your own values. # Check for required parameters -if [ "$#" -lt 5 ] || [ "$#" -gt 6 ]; then - echo "Usage: $0 OPENTSDB_ENDPOINT VICTORIA_ENDPOINT TIME_START TIME_STOP [hour|day] +if [[ $# -lt 4 ]]; then + echo "Usage: $0 OPENTSDB_ENDPOINT VICTORIA_ENDPOINT TIME_START TIME_STOP [hour|day] CONCURRENT_JOBS This tool dumps the data from an OpenTSDB and restore it to a VictoriaMetrics service. @@ -21,7 +21,9 @@ if [ "$#" -lt 5 ] || [ "$#" -gt 6 ]; then DATE_STOP time where to stop dumping - [hour|day] time frame size + [hour|day] time frame size to dump data. Default is day. + + CONCURRENT_JOBS number of concurrent jobs to run. Default is 1. Examples: @@ -38,6 +40,7 @@ start_date="$3" end_date="$4" metrics_prefix="$5" interval="${6:-day}" +concurrent_jobs="${7:-1}" # Set time interval case $interval in @@ -63,7 +66,7 @@ fi # Define function to dump data from OpenTSDB function dump_data { - docker run --rm --network="host" -v "opentsdb-data-${5}":/tmp kilda-otsdb-dump-restore kilda-otsdb-dump --metrics-prefix "${2}" --time-stop "${3}" "${4}" "${1}" + docker run --rm --network="host" -v "opentsdb-data-${5}":/tmp kilda-otsdb-dump-restore kilda-otsdb-dump --concurrent "${5}" --metrics-prefix "${2}" --time-stop "${3}" "${4}" "${1}" } # Define function to restore data to Victoria Metrics @@ -89,7 +92,7 @@ while [[ "$start_date" < "$end_date" ]]; do increment_date interval_end_date echo "Dumping data from ${start_date} to ${interval_end_date}" - dump_data "${start_date}" "${metrics_prefix}" "${interval_end_date}" "${opentsdb_endpoint}" "${volume_subfix}" + dump_data "${start_date}" "${metrics_prefix}" "${interval_end_date}" "${opentsdb_endpoint}" "${volume_subfix}" "${concurrent_jobs}" echo "Restoring data from ${start_date} to ${interval_end_date} in background" # restore_data "${victoria_metrics_endpoint}" "${volume_subfix}" & From 3e5d02784390703c2d87ef0cc2b812e837dddf44 Mon Sep 17 00:00:00 2001 From: Pablo Murillo Nogales Date: Fri, 9 Jun 2023 07:33:54 +0000 Subject: [PATCH 11/27] fix concurrent in script --- tools/otsdb-dump-restore/otsdb-to-vm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/otsdb-dump-restore/otsdb-to-vm.sh b/tools/otsdb-dump-restore/otsdb-to-vm.sh index efe10282607..f74ab8d5fba 100755 --- a/tools/otsdb-dump-restore/otsdb-to-vm.sh +++ b/tools/otsdb-dump-restore/otsdb-to-vm.sh @@ -66,7 +66,7 @@ fi # Define function to dump data from OpenTSDB function dump_data { - docker run --rm --network="host" -v "opentsdb-data-${5}":/tmp kilda-otsdb-dump-restore kilda-otsdb-dump --concurrent "${5}" --metrics-prefix "${2}" --time-stop "${3}" "${4}" "${1}" + docker run --rm --network="host" -v "opentsdb-data-${5}":/tmp kilda-otsdb-dump-restore kilda-otsdb-dump --concurrent "${6}" --metrics-prefix "${2}" --time-stop "${3}" "${4}" "${1}" } # Define function to restore data to Victoria Metrics From 3df5ea67418aa6455bb07aa240487b47b363e8ee Mon Sep 17 00:00:00 2001 From: Pablo Murillo Date: Mon, 12 Jun 2023 09:26:14 +0200 Subject: [PATCH 12/27] add query frame size to the script --- tools/otsdb-dump-restore/otsdb-to-vm.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/otsdb-dump-restore/otsdb-to-vm.sh b/tools/otsdb-dump-restore/otsdb-to-vm.sh index f74ab8d5fba..0be9e05a86c 100755 --- a/tools/otsdb-dump-restore/otsdb-to-vm.sh +++ b/tools/otsdb-dump-restore/otsdb-to-vm.sh @@ -9,7 +9,7 @@ # Check for required parameters if [[ $# -lt 4 ]]; then - echo "Usage: $0 OPENTSDB_ENDPOINT VICTORIA_ENDPOINT TIME_START TIME_STOP [hour|day] CONCURRENT_JOBS + echo "Usage: $0 OPENTSDB_ENDPOINT VICTORIA_ENDPOINT TIME_START TIME_STOP [hour|day] CONCURRENT_JOBS QUERY_FRAME_SIZE This tool dumps the data from an OpenTSDB and restore it to a VictoriaMetrics service. @@ -25,6 +25,8 @@ if [[ $# -lt 4 ]]; then CONCURRENT_JOBS number of concurrent jobs to run. Default is 1. + QUERY_FRAME_SIZE number of data points to query from OpenTSDB at once. Default is 180. + Examples: ./otsdb-to-vm.sh opentsdb.example.com:4242 victoria-metrics.example.com:4242 2022-01-01 2022-01-31 kilda. day @@ -41,6 +43,7 @@ end_date="$4" metrics_prefix="$5" interval="${6:-day}" concurrent_jobs="${7:-1}" +query_frame_size="${8:-180}" # Set time interval case $interval in @@ -66,7 +69,7 @@ fi # Define function to dump data from OpenTSDB function dump_data { - docker run --rm --network="host" -v "opentsdb-data-${5}":/tmp kilda-otsdb-dump-restore kilda-otsdb-dump --concurrent "${6}" --metrics-prefix "${2}" --time-stop "${3}" "${4}" "${1}" + docker run --rm --network="host" -v "opentsdb-data-${5}":/tmp kilda-otsdb-dump-restore kilda-otsdb-dump --query-frame-size "${7}" --concurrent "${6}" --metrics-prefix "${2}" --time-stop "${3}" "${4}" "${1}" } # Define function to restore data to Victoria Metrics @@ -92,7 +95,7 @@ while [[ "$start_date" < "$end_date" ]]; do increment_date interval_end_date echo "Dumping data from ${start_date} to ${interval_end_date}" - dump_data "${start_date}" "${metrics_prefix}" "${interval_end_date}" "${opentsdb_endpoint}" "${volume_subfix}" "${concurrent_jobs}" + dump_data "${start_date}" "${metrics_prefix}" "${interval_end_date}" "${opentsdb_endpoint}" "${volume_subfix}" "${concurrent_jobs}" "${query_frame_size}" echo "Restoring data from ${start_date} to ${interval_end_date} in background" # restore_data "${victoria_metrics_endpoint}" "${volume_subfix}" & From 99a11f52ff0d346c0dfc0d529aacdbf816cb708b Mon Sep 17 00:00:00 2001 From: Pablo Murillo Date: Mon, 12 Jun 2023 16:38:59 +0200 Subject: [PATCH 13/27] remove unused code --- tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py index b2bfd23b344..543c609f09f 100644 --- a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py @@ -86,10 +86,7 @@ def concurrent_dump(all_metrics_iterator, statistics, client, dump_frame, dump_d for metric in all_metrics_iterator: futures.append(executor.submit(dump, statistics, client, dump_frame, dump_dir, metric, query_frame_size, need_remove_meta)) - # concurrent.futures.wait(futures) - concurrent.futures.as_completed(futures) - # for _ in concurrent.futures.as_completed(futures): - # print("OK") + concurrent.futures.wait(futures) def dump(statistics, client, dump_frame, dump_location, metric_name, query_frame_size, need_remove_meta): From b1ca7e07b0a6a7b141df454dbf465902b61004cb Mon Sep 17 00:00:00 2001 From: Pablo Murillo Date: Wed, 14 Jun 2023 12:09:22 +0200 Subject: [PATCH 14/27] improve error handling --- .../src/kilda/tsdb_dump_restore/dump.py | 62 +++++++++++-------- .../src/kilda/tsdb_dump_restore/report.py | 6 ++ 2 files changed, 42 insertions(+), 26 deletions(-) diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py index 543c609f09f..37864e3a3b0 100644 --- a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py @@ -68,8 +68,7 @@ def main(opentsdb_endpoint, time_start, **options): http_session = requests.Session() http_session.hooks['response'].append(utils.ResponseStatisticsHook(rest_statistics)) - retries = Retry(total=5, backoff_factor=0.5, status_forcelist=[424]) - + retries = Retry(total=3, status_forcelist=[424]) http_session.mount('http://', HTTPAdapter(max_retries=retries)) client = stats_client.OpenTSDBStatsClient(http_session, opentsdb_endpoint) @@ -86,44 +85,55 @@ def concurrent_dump(all_metrics_iterator, statistics, client, dump_frame, dump_d for metric in all_metrics_iterator: futures.append(executor.submit(dump, statistics, client, dump_frame, dump_dir, metric, query_frame_size, need_remove_meta)) - concurrent.futures.wait(futures) + exit_result = 0 + for future in concurrent.futures.as_completed(futures): + if future.result() == 1: + exit_result = 1 -def dump(statistics, client, dump_frame, dump_location, metric_name, query_frame_size, need_remove_meta): - meta = _DumpMetadata(metric_name, dump_location) + exit(exit_result) + +def dump(statistics, client, dump_frame, dump_location, metric_name, query_frame_size, need_remove_meta): try: - last_frame = meta.read() - start = last_frame.start - except ValueError: - start = utils.datetime_align(dump_frame.start, query_frame_size) + meta = _DumpMetadata(metric_name, dump_location) + + try: + last_frame = meta.read() + start = last_frame.start + except ValueError: + start = utils.datetime_align(dump_frame.start, query_frame_size) + + end = dump_frame.end - end = dump_frame.end + query_manager = _AggDataQueryManager() + statistics.evaluate_expected_iterations_count(start, end, query_frame_size) - query_manager = _AggDataQueryManager() - statistics.evaluate_expected_iterations_count(start, end, query_frame_size) + stream = build_time_stream(start, end, query_frame_size) + stream = query_data_stream(stream, query_manager) + stream = stats_stream(stream, client, metric_name, query_manager) - stream = build_time_stream(start, end, query_frame_size) - stream = query_data_stream(stream, query_manager) - stream = stats_stream(stream, client, metric_name, query_manager) + dump_file = dump_location / (metric_name + '.ndjson') + with dump_file.open('at') as target: + if 0 < target.tell(): + # extending existing file, make sure we have line separator before + # new record + target.write('\n') + with DumpProgressReport(metric_name, statistics) as status_report: + _dump_stream(stream, target, meta, status_report, statistics) - dump_file = dump_location / (metric_name + '.ndjson') - with dump_file.open('at') as target: - if 0 < target.tell(): - # extending existing file, make sure we have line separator before - # new record - target.write('\n') - with DumpProgressReport(metric_name, statistics) as status_report: - _dump_stream(stream, target, meta, status_report, statistics) + if need_remove_meta: + meta.remove() - if need_remove_meta: - meta.remove() + except Exception as e: + print('Failed to dump metric "{}": {}'.format(metric_name, e)) + return 1 def _dump_stream(stream, target, meta, status_report, statistics): writer = mapping.get_csv_writer(target) for frame, stats_entries in stream: - status_report.flush() + # status_report.flush() for entry in stats_entries: statistics.add_entry(frame, entry) writer.writerow(mapping.encode_stats_entry(entry)) diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/report.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/report.py index e310f48124e..32437e92009 100644 --- a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/report.py +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/report.py @@ -47,5 +47,11 @@ def close(self): self.stream.write('\n') self.stream.flush() + # def get_last_message(self): + # chunks = self._format_message() + # chunks = (str(x) for x in chunks if x is not None) + # chunks = (str(x) for x in chunks if x) + # return ''.join(chunks) + def _format_message(self): raise NotImplementedError() From ce294344d5b9a32f23b00ed1242322bf4048fc6c Mon Sep 17 00:00:00 2001 From: Pablo Murillo Date: Thu, 15 Jun 2023 17:16:56 +0200 Subject: [PATCH 15/27] extend frame time of the stats request --- tools/otsdb-dump-restore/Dockerfile.devel | 2 +- tools/otsdb-dump-restore/Dockerfile.prod | 2 +- .../build-tools/setup-devel-user.sh | 2 +- tools/otsdb-dump-restore/otsdb-to-vm.sh | 1 - .../src/kilda/tsdb_dump_restore/dump.py | 54 +++++++++---------- .../src/kilda/tsdb_dump_restore/mapping.py | 2 +- .../src/kilda/tsdb_dump_restore/report.py | 6 --- .../kilda/tsdb_dump_restore/stats_client.py | 4 ++ 8 files changed, 33 insertions(+), 40 deletions(-) diff --git a/tools/otsdb-dump-restore/Dockerfile.devel b/tools/otsdb-dump-restore/Dockerfile.devel index 827cdb2f65e..5f2f80be9da 100644 --- a/tools/otsdb-dump-restore/Dockerfile.devel +++ b/tools/otsdb-dump-restore/Dockerfile.devel @@ -1,4 +1,4 @@ -FROM python:3.10-slim +FROM python:3.11-slim ARG devel_user_name=root ARG devel_uid=0 diff --git a/tools/otsdb-dump-restore/Dockerfile.prod b/tools/otsdb-dump-restore/Dockerfile.prod index be8abcde381..af50070ebd0 100644 --- a/tools/otsdb-dump-restore/Dockerfile.prod +++ b/tools/otsdb-dump-restore/Dockerfile.prod @@ -1,4 +1,4 @@ -FROM python:3.10-slim +FROM python:3.11-slim ARG wheel diff --git a/tools/otsdb-dump-restore/build-tools/setup-devel-user.sh b/tools/otsdb-dump-restore/build-tools/setup-devel-user.sh index 2d8e8a62c49..ca773c43384 100755 --- a/tools/otsdb-dump-restore/build-tools/setup-devel-user.sh +++ b/tools/otsdb-dump-restore/build-tools/setup-devel-user.sh @@ -25,4 +25,4 @@ chown "${DEVEL_UID}:${DEVEL_GID}" \ /kilda \ /usr/local \ /usr/local/bin \ - /usr/local/lib/python3.10/site-packages + /usr/local/lib/python3.11/site-packages diff --git a/tools/otsdb-dump-restore/otsdb-to-vm.sh b/tools/otsdb-dump-restore/otsdb-to-vm.sh index 0be9e05a86c..cca9b23dacf 100755 --- a/tools/otsdb-dump-restore/otsdb-to-vm.sh +++ b/tools/otsdb-dump-restore/otsdb-to-vm.sh @@ -83,7 +83,6 @@ function increment_date() eval $__resultvar=$(date -d "${start_date} ${increment}" +${interval_format}) } - # Loop through dates while [[ "$start_date" < "$end_date" ]]; do # trim : from date diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py index 37864e3a3b0..030505eab02 100644 --- a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py @@ -62,29 +62,23 @@ def main(opentsdb_endpoint, time_start, **options): dump_dir.mkdir(exist_ok=True, parents=True) dump_frame = _TimeFrame(time_start, time_stop) - rest_statistics = utils.RestStatistics() - statistics = _DumpStatistics(rest_statistics) - http_session = requests.Session() - http_session.hooks['response'].append(utils.ResponseStatisticsHook(rest_statistics)) - - retries = Retry(total=3, status_forcelist=[424]) - http_session.mount('http://', HTTPAdapter(max_retries=retries)) - client = stats_client.OpenTSDBStatsClient(http_session, opentsdb_endpoint) all_metrics_iterator = stats_client.OpenTSDBMetricsList(http_session, opentsdb_endpoint, prefix=prefix) - concurrent_dump(all_metrics_iterator, statistics, client, dump_frame, - dump_dir, query_frame_size, need_remove_meta, concurrent_executors) + print('Starting dumping metrics from {} to {}'.format(time_start, time_stop)) + + concurrent_dump(all_metrics_iterator, dump_frame, + dump_dir, query_frame_size, need_remove_meta, concurrent_executors, opentsdb_endpoint) -def concurrent_dump(all_metrics_iterator, statistics, client, dump_frame, dump_dir, - query_frame_size, need_remove_meta, concurrent_executors): +def concurrent_dump(all_metrics_iterator, dump_frame, dump_dir, + query_frame_size, need_remove_meta, concurrent_executors, opentsdb_endpoint): with concurrent.futures.ThreadPoolExecutor(max_workers=concurrent_executors) as executor: futures = [] for metric in all_metrics_iterator: - futures.append(executor.submit(dump, statistics, client, dump_frame, - dump_dir, metric, query_frame_size, need_remove_meta)) + futures.append(executor.submit(dump, dump_frame, + dump_dir, metric, query_frame_size, need_remove_meta, opentsdb_endpoint)) exit_result = 0 for future in concurrent.futures.as_completed(futures): @@ -94,8 +88,19 @@ def concurrent_dump(all_metrics_iterator, statistics, client, dump_frame, dump_d exit(exit_result) -def dump(statistics, client, dump_frame, dump_location, metric_name, query_frame_size, need_remove_meta): +def dump(dump_frame, dump_location, metric_name, query_frame_size, need_remove_meta, opentsdb_endpoint): try: + rest_statistics = utils.RestStatistics() + statistics = _DumpStatistics(rest_statistics) + + http_session = requests.Session() + http_session.hooks['response'].append(utils.ResponseStatisticsHook(rest_statistics)) + + retries = Retry(total=3, status_forcelist=[424]) + http_session.mount('http://', HTTPAdapter(max_retries=retries)) + + client = stats_client.OpenTSDBStatsClient(http_session, opentsdb_endpoint) + meta = _DumpMetadata(metric_name, dump_location) try: @@ -129,10 +134,14 @@ def dump(statistics, client, dump_frame, dump_location, metric_name, query_frame print('Failed to dump metric "{}": {}'.format(metric_name, e)) return 1 + finally: + client.close() + def _dump_stream(stream, target, meta, status_report, statistics): writer = mapping.get_csv_writer(target) for frame, stats_entries in stream: + # This flush doesn't work fine with concurrency, so we don't use it. # status_report.flush() for entry in stats_entries: statistics.add_entry(frame, entry) @@ -151,7 +160,6 @@ def build_time_stream(start, end, step): stream = frame_stream(stream) stream = frame_overlap_fix_stream( stream, end_offset=datetime.timedelta(seconds=-1)) - for frame_start, frame_end in stream: yield factory.produce(frame_start, frame_end) @@ -210,25 +218,13 @@ def stats_stream(stream, client, metric, query_manager): for entry in batches: if entry.aggregate_tags: query_manager.schedule( - _extract_stats_batch_time_frame(entry, query_data.frame), + query_data.frame, entry.aggregate_tags) continue yield query_data.frame, stats_client.batch_to_entries(entry) -def _extract_stats_batch_time_frame(batch, fallback): - if not batch.values: - return fallback - start = batch.values[0].timestamp - end = batch.values[-1].timestamp - if start == end: - end = start + datetime.timedelta(seconds=1) - - return _TimeFrame( - start, end, step_number=fallback.step_number) - - class _DumpStatistics: def __init__(self, rest_statistics): self.rest = rest_statistics diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/mapping.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/mapping.py index 2d6293d1ce6..3f8a7287bf1 100644 --- a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/mapping.py +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/mapping.py @@ -33,7 +33,7 @@ def decode_raw_cvs_row(raw): timestamp = float(row[0]) metric_name = row[1] tags = ast.literal_eval(row[2]) - value = int(row[3]) + value = float(row[3]) return stats_client.StatsEntry( utils.unixtime_to_datetime(timestamp), metric_name, value, tags=tags) diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/report.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/report.py index 32437e92009..e310f48124e 100644 --- a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/report.py +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/report.py @@ -47,11 +47,5 @@ def close(self): self.stream.write('\n') self.stream.flush() - # def get_last_message(self): - # chunks = self._format_message() - # chunks = (str(x) for x in chunks if x is not None) - # chunks = (str(x) for x in chunks if x) - # return ''.join(chunks) - def _format_message(self): raise NotImplementedError() diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/stats_client.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/stats_client.py index 60b577aeb8a..fd99b4ee27c 100644 --- a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/stats_client.py +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/stats_client.py @@ -13,6 +13,9 @@ def __init__(self, http_session, endpoint): self._http_session = http_session self._url_factory = utils.HttpUrlFactory(endpoint) + def close(self): + self._http_session.close() + class VictoriaMetricsStatsClient(_StatsClientBase): def query_range(self, start, end, metric_name, tags=None, is_rate=False): @@ -108,6 +111,7 @@ def query_range(self, start, end, metric_name, tags=None, is_rate=False): start=self._format_time_query_arg(start), end=self._format_time_query_arg(end), m=self._build_query(metric_name, tags, agg_func))) + response.raise_for_status() return self._parse_query_response(response.json()) From ee544d74cddd8cadb9c990b9cf2fb20b22486d53 Mon Sep 17 00:00:00 2001 From: Pablo Murillo Date: Tue, 11 Jul 2023 10:14:54 +0200 Subject: [PATCH 16/27] normalize dates, add error 500 to retries --- tools/otsdb-dump-restore/otsdb-to-vm.sh | 4 ++++ tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/otsdb-dump-restore/otsdb-to-vm.sh b/tools/otsdb-dump-restore/otsdb-to-vm.sh index cca9b23dacf..d256df47ad1 100755 --- a/tools/otsdb-dump-restore/otsdb-to-vm.sh +++ b/tools/otsdb-dump-restore/otsdb-to-vm.sh @@ -67,6 +67,10 @@ if [[ "$(docker images -q kilda-otsdb-dump-restore 2> /dev/null)" == "" ]]; then exit 1 fi +# Convert start and end date to interval format +start_date="$(gdate -d "${start_date}" +${interval_format})" +end_date="$(gdate -d "${end_date}" +${interval_format})" + # Define function to dump data from OpenTSDB function dump_data { docker run --rm --network="host" -v "opentsdb-data-${5}":/tmp kilda-otsdb-dump-restore kilda-otsdb-dump --query-frame-size "${7}" --concurrent "${6}" --metrics-prefix "${2}" --time-stop "${3}" "${4}" "${1}" diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py index 030505eab02..0eefb679f4a 100644 --- a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py @@ -96,7 +96,7 @@ def dump(dump_frame, dump_location, metric_name, query_frame_size, need_remove_m http_session = requests.Session() http_session.hooks['response'].append(utils.ResponseStatisticsHook(rest_statistics)) - retries = Retry(total=3, status_forcelist=[424]) + retries = Retry(total=3, status_forcelist=[424, 500]) http_session.mount('http://', HTTPAdapter(max_retries=retries)) client = stats_client.OpenTSDBStatsClient(http_session, opentsdb_endpoint) From 5fa9983a2933a4dd080966c5b8659a9337463d8d Mon Sep 17 00:00:00 2001 From: Pablo Murillo Date: Wed, 12 Jul 2023 12:11:58 +0200 Subject: [PATCH 17/27] add an option to use gdate if is mac, date if not. add 1048576 size limit for the restore requests --- tools/otsdb-dump-restore/otsdb-to-vm.sh | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/otsdb-dump-restore/otsdb-to-vm.sh b/tools/otsdb-dump-restore/otsdb-to-vm.sh index d256df47ad1..f715acfbf0e 100755 --- a/tools/otsdb-dump-restore/otsdb-to-vm.sh +++ b/tools/otsdb-dump-restore/otsdb-to-vm.sh @@ -67,9 +67,16 @@ if [[ "$(docker images -q kilda-otsdb-dump-restore 2> /dev/null)" == "" ]]; then exit 1 fi +# Use 'gdate' command if it is MacOS or 'date' otherwise +if [[ "$(uname)" == "Darwin" ]]; then + date="gdate" +else + date="date" +fi + # Convert start and end date to interval format -start_date="$(gdate -d "${start_date}" +${interval_format})" -end_date="$(gdate -d "${end_date}" +${interval_format})" +start_date="$($date -d "${start_date}" +${interval_format})" +end_date="$($date -d "${end_date}" +${interval_format})" # Define function to dump data from OpenTSDB function dump_data { @@ -78,13 +85,13 @@ function dump_data { # Define function to restore data to Victoria Metrics function restore_data { - docker run --rm --network="host" -v "opentsdb-data-${2}":/tmp kilda-otsdb-dump-restore kilda-otsdb-restore "${1}" && docker volume rm "opentsdb-data-${2}" || echo "Failed to restore data to Victoria Metrics" >&2 + docker run --rm --network="host" -v "opentsdb-data-${2}":/tmp kilda-otsdb-dump-restore kilda-otsdb-restore --request-size-limit 1048576 "${1}" && docker volume rm "opentsdb-data-${2}" || echo "Failed to restore data to Victoria Metrics" >&2 } function increment_date() { local __resultvar=$1 - eval $__resultvar=$(date -d "${start_date} ${increment}" +${interval_format}) + eval $__resultvar=$($date -d "${start_date} ${increment}" +${interval_format}) } # Loop through dates @@ -101,7 +108,7 @@ while [[ "$start_date" < "$end_date" ]]; do dump_data "${start_date}" "${metrics_prefix}" "${interval_end_date}" "${opentsdb_endpoint}" "${volume_subfix}" "${concurrent_jobs}" "${query_frame_size}" echo "Restoring data from ${start_date} to ${interval_end_date} in background" - # restore_data "${victoria_metrics_endpoint}" "${volume_subfix}" & + restore_data "${victoria_metrics_endpoint}" "${volume_subfix}" & # Increment date by time interval increment_date start_date From 8104f04cf0b5a33e3c908b23774e8e43c48f8cb8 Mon Sep 17 00:00:00 2001 From: Pablo Murillo Date: Thu, 13 Jul 2023 16:16:54 +0200 Subject: [PATCH 18/27] increase time between retries --- tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py index 0eefb679f4a..6f543d3325e 100644 --- a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/dump.py @@ -96,7 +96,7 @@ def dump(dump_frame, dump_location, metric_name, query_frame_size, need_remove_m http_session = requests.Session() http_session.hooks['response'].append(utils.ResponseStatisticsHook(rest_statistics)) - retries = Retry(total=3, status_forcelist=[424, 500]) + retries = Retry(total=5, backoff_factor=1, status_forcelist=[424, 500]) http_session.mount('http://', HTTPAdapter(max_retries=retries)) client = stats_client.OpenTSDBStatsClient(http_session, opentsdb_endpoint) From 46701c55182833965efd78ae6b77cd2635f89860 Mon Sep 17 00:00:00 2001 From: Pablo Murillo Date: Wed, 13 Sep 2023 14:19:33 +0200 Subject: [PATCH 19/27] modify aggregator funtion to use none --- .../src/kilda/tsdb_dump_restore/constants.py | 2 ++ .../src/kilda/tsdb_dump_restore/stats_client.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/constants.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/constants.py index abeffa62f4f..f325a28b7a5 100644 --- a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/constants.py +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/constants.py @@ -4,3 +4,5 @@ METRIC_NAME_FIELD = 'metric_name' TAGS_FIELD = 'tags' VALUE_FIELD = 'value' + +AGGREGATE_FUNCTION = 'none' diff --git a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/stats_client.py b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/stats_client.py index fd99b4ee27c..cfb3dc68e66 100644 --- a/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/stats_client.py +++ b/tools/otsdb-dump-restore/src/kilda/tsdb_dump_restore/stats_client.py @@ -6,6 +6,7 @@ import string from kilda.tsdb_dump_restore import utils +from kilda.tsdb_dump_restore.constants import AGGREGATE_FUNCTION class _StatsClientBase: @@ -101,7 +102,7 @@ def query_range(self, start, end, metric_name, tags=None, is_rate=False): if not tags: tags = dict() - agg_func = 'max' + agg_func = AGGREGATE_FUNCTION if is_rate: agg_func = 'rate' From 2605b0e4a91aa88017b31953702ef1fb75927b64 Mon Sep 17 00:00:00 2001 From: pidfork <203948092347@pidfork.com> Date: Fri, 22 Sep 2023 11:13:17 +0000 Subject: [PATCH 20/27] dump shell script improvement. Main changes: Dump and Upload process are running independenlty. Additionally, the Dump process is checking if there is enough disk space for dumping and will pause until the Upload process will frees up some space --- tools/otsdb-dump-restore/README.md | 54 ++- tools/otsdb-dump-restore/otsdb-to-vm.sh | 583 +++++++++++++++++++++--- 2 files changed, 541 insertions(+), 96 deletions(-) diff --git a/tools/otsdb-dump-restore/README.md b/tools/otsdb-dump-restore/README.md index cadaa83f969..4db83082479 100644 --- a/tools/otsdb-dump-restore/README.md +++ b/tools/otsdb-dump-restore/README.md @@ -110,25 +110,37 @@ docker run --rm -v opentsdb-data:/tmp kilda-otsdb-dump-restore kilda-otsdb-resto docker volume remove opentsdb-data ``` -### How to use the otsdb-to-vm script -The otsdb-to-vm script is a wrapper around the kilda-otsdb-dump-restore tool. It is used to dump data from an OpenTSDB and restore it to a VictoriaMetrics service. -```bash -Usage: otsdb-to-vm OPENTSDB_ENDPOINT VICTORIA_ENDPOINT TIME_START TIME_STOP [hour|day] - - This tool dumps the data from an OpenTSDB and restore it to a VictoriaMetrics service. - - OPENTSDB_ENDPOINT openTSDB endpoint - - VICTORIA_ENDPOINT VictoriaMetrics endpoint - - TIME_START time since the data is dumped - - DATE_STOP time where to stop dumping - - [hour|day] time frame size +### How to use the `otsdb-to-vm.sh` script +The `otsdb-to-vm.sh` script is a bash wrapper around the `kilda-otsdb-dump-restore` tool. It is used to dump data from an OpenTSDB and restore it to a VictoriaMetrics service. +Options `--opentsdb_endpoint`, `--victoria_metrics_endpoint`, `--start_date` and `--end_date` are **mandatory**. - Examples: - - ./otsdb-to-vm.sh opentsdb.example.com:4242 victoria-metrics.example.com:4242 2022-01-01 2022-01-31 kilda. day - ./otsdb-to-vm.sh opentsdb.example.com:4242 victoria-metrics.example.com:4242 2022-01-01T00:00:00 2022-01-01T23:59:59 kilda. hour -``` +### Usage examples: +```bash +# pump data using one day batch +./otsdb-to-vm.sh -s http://opentsdb.example.com:4242 -d http://victoria-metrics.example.com:4242 --start_date 2022-01-01 --end_date 2022-01-31 --metrics_prefix 'kilda.' --interval day + +# pump data using one hour batch +./otsdb-to-vm.sh -s http://opentsdb.example.com:4242 -d http://victoria-metrics.example.com:4242 --start_date 2022-01-01T00:00:00 --end_date 2022-01-01T23:59:59 --metrics_prefix 'kilda.' --interval hour +``` + +### How it working: +Two processes are running simultaneously: +* The `dump` process (foreground) is responsible for saving **batches** of data from OpenTSDB to the specified folder; +* The `restore` process (background) is tasked with uploading dumped data to the remote Victoria metrics server; +# +The `dump` process runs batches in a loop, continually checking for available space/inodes to perform a dump. if it detects a lack of space, the dump cycle will wait until space become available. Once the dump successfully finishes current batch, the folder will be renamed to indicate it's ready for upload; + +1) When the `dump` process will complete all batches, it sends a `SIGUSR1` signal to the `restore` process and waits for the `restore` process to finish. +2) Pressing `Ctrl+C` once increments an internal counter but doesn't take any immediate action. +3) Pressing `Ctrl+C` twice will stop the `dump` process. The process will wait for all background `restore` tasks to finish before terminating. +4) Pressing `Ctrl+C` three or more times will send stop signal `SIGTERM` to the background process, causing it to stop execution after completing the current upload task. +# +The `restore` process runs in the background, continously monitoring folders named with a 'ready' state marker and logging it's execution events to the 'background_log' file. + +1) During each loop cycle, the `restore` processes only one folder. +2) If there are no new folders to process, it suspends execution and waits for a new folder to become available for upload (marker in the name). +3) After a successfull restore operation, the folder is removed, freeing up space for use by the `dump` process. +4) The `restore` process is prepared to capture a `Ctrl+C` or `SIGTERM` signal and terminate its loop execution after completing the current upload task. +5) The `restore` process also listens for a `SIGUSR1` signal to gracefully finish it's loop execution. +6) Upon receiving the `SIGUSR1` signal, the `restore` process assumes that the `dump` loop has finished. It processes and removes all ready folders and then terminates its execution. +# diff --git a/tools/otsdb-dump-restore/otsdb-to-vm.sh b/tools/otsdb-dump-restore/otsdb-to-vm.sh index f715acfbf0e..6155d5b8b58 100755 --- a/tools/otsdb-dump-restore/otsdb-to-vm.sh +++ b/tools/otsdb-dump-restore/otsdb-to-vm.sh @@ -1,117 +1,550 @@ #!/bin/bash -# ./otsdb-to-vm.sh opentsdb.example.com:4242 victoria-metrics.example.com:4242 2022-01-01 2022-01-31 kilda. day -# This command will migrate data from OpenTSDB running at "opentsdb.example.com:4242" -#to Victoria Metrics running at "victoria-metrics.example.com:4242" for the time period -#between January 1, 2022 and January 31, 2022, using the metrics prefix "my-metrics-prefix" -#and a time interval of one day. You can customize the command by replacing the parameters -#with your own values. +# shellcheck enable=require-variable-braces -# Check for required parameters -if [[ $# -lt 4 ]]; then - echo "Usage: $0 OPENTSDB_ENDPOINT VICTORIA_ENDPOINT TIME_START TIME_STOP [hour|day] CONCURRENT_JOBS QUERY_FRAME_SIZE +# +# Script for migration data from OpenTSDB to Victoria metrics +# there are two processes: +# dump - which is saving batches of data from OpenTSDB to the specified folder +# restore - which is uploading data dumped data to the remote Victoria metrics server +# dump process running batches in the loop +# every time it checking if there are enogh of space/inodes to make a dump +# if lack of space detected - dump loop will wait for a free space +# restore process running in the background and writing execution process to the log file 'background_log' +# every loop only one folder is processed +# if there is no new folder to process it will suspend execution and wait for a new folder to restore +# after successfull restore - dump batch folder will be removed and free space can be used by dump process +# once dump process will finish all batches it will send SIGUSR1 signal to the restore process, and will wait until restore process will be finished +# on SIGUSR1 signal restore process will understand whel all folders will be processed and removed - it will finish execution. +# Ctrl+C pressed once doesn't do something, just increase the counter +# Pressed twice - will stop dump process. process will wait for a finish background restore task +# Pressed third time and more will send stop signal to background process +# - This tool dumps the data from an OpenTSDB and restore it to a VictoriaMetrics service. +# enable job control +set -m - OPENTSDB_ENDPOINT openTSDB endpoint +# ============================================================================== +# set defaults +interval="day" +interval_format="%Y-%m-%d" +concurrent_jobs_limit="50" +concurrent_jobs="1" +query_frame_size="180" +dump_folder_prefix="opentsdb-data-" +dump_storage_folder="/vm_migration_volume" +default_docker_image_name='kilda-otsdb-dump-restore' +ready_suffix='_dump_done' +# if file exist - background process will continue execution +marker_for_execution="${marker_for_execution:-/tmp/kilda-otsdb-dump-restore-marker}" +background_log="${background_log:-/tmp/kilda-otsdb-dump-restore.log}" +# interval (sec) for upload loop to wait new ready folder with data to process +delay_between_checks_new_folder="${delay_between_checks_new_folder:-10}" +# under min_inodes limit dump will wait to freeup inodes (value depends on your situation) +min_inodes="${min_inodes:-1000}" +# under min_free_space (bytes) dump will wait to freeup space (value depends on size of your dumps) +min_free_space="${min_free_space:-100000000}" +# timeout to get info from src endpoint (used by curl which is checking mertics presence) +src_timeout="10" +# ============================================================================== +# shellcheck disable=SC2155 +readonly _c_none_=$(printf '\e[39m') +# shellcheck disable=SC2155 +readonly _c_red_=$(printf '\e[31m') +# shellcheck disable=SC2155 +readonly _c_green_=$(printf '\e[32m') +# shellcheck disable=SC2155 +readonly _c_yellow_=$(printf '\e[33m') +# shellcheck disable=SC2155 +readonly _c_blue_=$(printf '\e[34m') +# shellcheck disable=SC2155 +readonly _c_magenta_=$(printf '\e[35m') - VICTORIA_ENDPOINT VictoriaMetrics endpoint +# ============================================================================== +function log() { + local l_type="${1^^}" + local l_value + case "${l_type}" in + OK) + l_value="${_c_green_}[${l_type}]${_c_none_}" + ;; + INFO) + l_value="${_c_blue_}[${l_type}]${_c_none_}" + ;; + WARNING) + l_value="${_c_yellow_}[${l_type}]${_c_none_}" + ;; + ERROR|FAIL) + l_value="${_c_red_}[${l_type}]${_c_none_}" + ;; + *) + # magenta for unknown + l_value="${_c_magenta_}[${l_type}]${_c_none_}" + ;; + esac + # print params starting from the second + echo -e "${l_value} $("${date_cmd}" "+%F %T") ${*:2}" +} + +# ============================================================================== +# shellcheck disable=SC2155 +readonly date_cmd="$(which date)" +# shellcheck disable=SC2155 +readonly bc_cmd="$(which bc)" - TIME_START time since the data is dumped +if [ -z "${date_cmd}" ] ; then + log "ERROR" "date command not found" >&2 + exit 1 +fi - DATE_STOP time where to stop dumping +if [ -z "${bc_cmd}" ] ; then + log "ERROR" "bc command not found" >&2 + exit 1 +fi - [hour|day] time frame size to dump data. Default is day. +# ============================================================================== +function help() { + log "INFO" "Allowed options are:" + log "INFO" " -s|--source|--opentsdb_endpoint" + log "INFO" " openTSDB endpoint" + log "INFO" " -d|--destination|--victoria_metrics_endpoint" + log "INFO" " victoria metrics endpoint" + log "INFO" " -sd|--start_date time since the data is dumped" + log "INFO" " -ed|--end_date time where to stop dumping" + log "INFO" " -m|--metrics_prefix prefix for exported metrics" + log "INFO" " -i|--interval time frame size to dump data. Default is day." + log "INFO" " -c|--concurrent_jobs number of concurrent jobs to run. Default is 1" + log "INFO" " -q|--query_frame_size number of data points to query from OpenTSDB" + log "INFO" " at once. Default is 180." + log "INFO" " -dfp|--dump_folder_prefix prefix string used for a folder name" + log "INFO" " -dsf|--dump_storage_folder path string to storage where the dump folders will be stored" + log "INFO" " -sbl|--show_background_log option used to display tail on the background log" + log "INFO" "Examples:" + log "INFO" " ./otsdb-to-vm.sh -s http://opentsdb.example.com:4242 -d http://victoria-metrics.example.com:4242 --start_date 2022-01-01 --end_date 2022-01-31 --metrics_prefix 'kilda.' --interval day" + log "INFO" " ./otsdb-to-vm.sh -s http://opentsdb.example.com:4242 -d http://victoria-metrics.example.com:4242 --start_date 2022-01-01T00:00:00 --end_date 2022-01-01T23:59:59 --metrics_prefix 'kilda.' --interval hour" +} - CONCURRENT_JOBS number of concurrent jobs to run. Default is 1. +# ============================================================================== +function check_metrics_exist() { + local l_opentsdb_endpoint="${1}" + local l_metrics_prefix="${2}" + local l_json + l_json=$(curl -m"${src_timeout}" -s "${l_opentsdb_endpoint}/api/suggest?type=metrics&q=${l_metrics_prefix}&max=150" 2>/dev/null) + local l_exitcode="${?}" + if [ "${l_exitcode}" -ne "0" ] ; then + log "ERROR" "curl returned \"${_c_red_}${l_exitcode}${_c_none_}\" error." >&2 + log "ERROR" "check man page with: ${_c_yellow_}man --pager='less -p \"^EXIT CODES\"' curl${_c_none_}" + return 1 + fi + # I heard about jq but ... not present everywhere by default + # remove spaces and newlines then remove square brackets from begin and end of json array then replace commas with newlines + local l_result + l_result=$(tr -d ' \n' <<<"${l_json}" | sed 's/^\[\|\]$//g;s/","/"\n"/g' | sed 's/^"\|"$//g') #' + # if result isn't empty - success. mertics exist + if [ -z "${l_result}" ] ; then + return 1 + else + log "INFO" "\"$(wc -l <<<"${l_result}")\" metrics was/were found" + return 0 + fi +} - QUERY_FRAME_SIZE number of data points to query from OpenTSDB at once. Default is 180. +# ============================================================================== +while [ "${#}" -gt "0" ] ; do + key="${1}" + case "${key}" in + -s|--source|--opentsdb_endpoint) + opentsdb_endpoint="${2}" + shift # remove key + shift # remove value + ;; - Examples: + -d|--destination|--victoria_metrics_endpoint) + victoria_metrics_endpoint="${2}" + shift # remove key + shift # remove value + ;; - ./otsdb-to-vm.sh opentsdb.example.com:4242 victoria-metrics.example.com:4242 2022-01-01 2022-01-31 kilda. day - ./otsdb-to-vm.sh opentsdb.example.com:4242 victoria-metrics.example.com:4242 2022-01-01T00:00:00 2022-01-01T23:59:59 kilda. hour" - exit 1 -fi + -sd|--start_date) + start_date="${2}" + shift # remove key + shift # remove value + ;; + -ed|--end_date) + end_date="${2}" + shift # remove key + shift # remove value + ;; -# Set parameters -opentsdb_endpoint="$1" -victoria_metrics_endpoint="$2" -start_date="$3" -end_date="$4" -metrics_prefix="$5" -interval="${6:-day}" -concurrent_jobs="${7:-1}" -query_frame_size="${8:-180}" - -# Set time interval -case $interval in - hour) - interval_format="%Y-%m-%dT%H:00:00" - increment="1 hour" + -m|--metrics_prefix) + # check if after prefix we have another option + if [[ "${2}" =~ ^(-.|--).+$ ]] ; then + metrics_prefix="" + shift # remove key + else + metrics_prefix="${2}" + shift # remove key + shift # remove value + fi ;; - day) - interval_format="%Y-%m-%d" - increment="1 days" + + -i|--interval) + interval="${2,,}" + # set time interval + if [ "${interval}" == "hour" ] ; then + interval_format="%Y-%m-%dT%H:00:00" + increment="1 hour" + elif [ "${interval}" == "day" ] ; then + interval_format="%Y-%m-%d" + increment="1 days" + else + log "ERROR" "Invalid interval: \"${interval}\"" >&2 + log "ERROR" "Allowed interval: day|hour" >&2 + exit 1 + fi + shift # remove key + shift # remove value ;; - *) - echo "Invalid interval: $interval" - exit 1 + + -c|--concurrent_jobs) + concurrent_jobs="${2}" + if [[ ! "${concurrent_jobs}" =~ ^[0-9]+$ ]] ; then + log "ERROR" "Concurency value should be a number [1..${concurrent_jobs_limit}]" >&2 + exit 1 + fi + if [ "${concurrent_jobs}" -lt "1" ] || [ "${concurrent_jobs}" -gt "${concurrent_jobs_limit}" ] ; then + log "ERROR" "Concurency value \"${concurrent_jobs}\" is out of range [1..${concurrent_jobs_limit}]" >&2 + exit 1 + fi + shift # remove key + shift # remove value ;; -esac + -q|--query_frame_size) + query_frame_size="${2}" + shift # remove key + shift # remove value + ;; + + -dfp|--dump_folder_prefix) + dump_folder_prefix="${2}" + shift # remove key + shift # remove value + ;; + + -dsf|--dump_storage_folder) + dump_storage_folder=$(realpath "${2}") + exitcode="${?}" + if [ "${exitcode}" -ne "0" ] ; then + log "ERROR" "Can't resolv real path for \"${2}\". Abort." >&2 + exit 1 + fi + shift # remove key + shift # remove value + ;; + + -sbl|--show_background_log) + show_background_log="yes" + shift + ;; + *) + log "ERROR" "Unknown option \"${key}\"" >&2 + help + exit 1 + ;; + esac +done -if [[ "$(docker images -q kilda-otsdb-dump-restore 2> /dev/null)" == "" ]]; then - echo "Docker image kilda-otsdb-dump-restore not found. Please build it first." >&2 - exit 1 +# check if mandatory vars are all defined +while read -r var_name ; do + if [ -z "${!var_name+x}" ] ; then + log "ERROR" "You have to define --${var_name} option" >&2 + _error_="1" + fi +done < <(echo -e "opentsdb_endpoint\nvictoria_metrics_endpoint\nstart_date\nend_date") +# exit if there was an error +if [ -n "${_error_}" ] ; then + help + exit 1 fi -# Use 'gdate' command if it is MacOS or 'date' otherwise -if [[ "$(uname)" == "Darwin" ]]; then - date="gdate" -else - date="date" +# check if dump storage folder exist +if [ ! -d "${dump_storage_folder}" ] ; then + log "ERROR" "Folder \"${dump_storage_folder}\" for saving dumps must exist. Need to create or define with option --dump_storage_folder" >&2 + exit 1 fi -# Convert start and end date to interval format -start_date="$($date -d "${start_date}" +${interval_format})" -end_date="$($date -d "${end_date}" +${interval_format})" +# check if docker installed and you have access +result=$(docker --version 2>&1) +exitcode="${?}" +if [ "${exitcode}" -ne "0" ] ; then + log "ERROR" "Can't execute docker command. Docker may be not installed or user \"${_c_yellow_}$(whoami)${_c_none_}\" require access to the docker socket." >&2 + log "ERROR" "${result}" >&2 + exit 1 +fi +if [ -z "${DEBUG}" ] ; then +# Check if image exist +if [ -z "$(docker images -q "${default_docker_image_name}" 2> /dev/null)" ]; then + log "ERROR" "Docker image \"${_c_yellow_}${default_docker_image_name}${_c_none_}\" not found. Please build it first." >&2 + exit 1 +fi +fi + +# check if we have metrics to dump +if ! check_metrics_exist "${opentsdb_endpoint}" "${metrics_prefix}" ; then + log "ERROR" "No metrics found by prefix \"${_c_yellow_}${metrics_prefix}${_c_none_}\" at \"${_c_magenta_}${opentsdb_endpoint}${_c_none_}\"" + exit 1 +fi + +# convert start and end date to interval format +start_date="$(${date_cmd} -d "${start_date}" +${interval_format})" +end_date="$(${date_cmd} -d "${end_date}" +${interval_format})" + +# ============================================================================== # Define function to dump data from OpenTSDB function dump_data { - docker run --rm --network="host" -v "opentsdb-data-${5}":/tmp kilda-otsdb-dump-restore kilda-otsdb-dump --query-frame-size "${7}" --concurrent "${6}" --metrics-prefix "${2}" --time-stop "${3}" "${4}" "${1}" + local l_start_date="${1}" + local l_metrics_prefix="${2}" + local l_interval_end_date="${3}" + local l_opentsdb_endpoint="${4}" + local l_concurrent_jobs="${5}" + local l_query_frame_size="${6}" + local l_volume_suffix l_local_bind_folder + # replace `-` and `T` to `.` and `_` for a filename's suffix + l_volume_suffix=$(sed 'y/-T/._/' <<< "${l_start_date}_${l_interval_end_date}") + l_local_bind_folder="${dump_storage_folder}/${dump_folder_prefix}${l_volume_suffix}" + if [ -e "${l_local_bind_folder}${ready_suffix}" ] ; then + log "ERROR" "Folder \"${l_local_bind_folder}${ready_suffix}\" exist already. Not required to dump again." >&2 + return 1 + fi + log "INFO" "Dumping data since \"${l_start_date}\" till \"${l_interval_end_date}\" to folder \"${l_local_bind_folder}\"" + # Create dump folder for this iteration + if [ ! -d "${l_local_bind_folder}" ] ; then + mkdir -p "${l_local_bind_folder}" + else + log "WARNING" "Folder \"${l_local_bind_folder}\" exist already. Skip creation." >&2 + fi +if [ -z "${DEBUG}" ] ; then + docker run --rm --network="host" \ + -v "${l_local_bind_folder}":/tmp \ + "${default_docker_image_name}" \ + kilda-otsdb-dump \ + --query-frame-size "${l_query_frame_size}" \ + --concurrent "${l_concurrent_jobs}" \ + --metrics-prefix "${l_metrics_prefix}" \ + --time-stop "${l_interval_end_date}" \ + "${l_opentsdb_endpoint}" "${l_start_date}" +else + delay=2 + log "DEBUG" "Sleep ${delay} in dump" >&2 + sleep "${delay}" +fi + local exicode="${?}" + if [ "${exicode}" -ne "0" ] ; then + log "ERROR" "Failed to dump data into \"${_c_yellow_}${l_local_bind_folder}${_c_none_}\"" >&2 + log "ERROR" "Removing folder \"${_c_yellow_}${l_local_bind_folder}${_c_none_}\"" >&2 + [ -d "${l_local_bind_folder}" ] && rm -rf "${l_local_bind_folder}" + log "ERROR" "Lost interval \"${l_start_date}\" till \"${l_interval_end_date}\" to folder \"${l_local_bind_folder}\"" >&2 + else + log "INFO" "Rename dumped folder to inform restore process for upload." + log "INFO" "\"${l_local_bind_folder}\" to \"${l_local_bind_folder}${ready_suffix}\"" >&2 + if [ -e "${l_local_bind_folder}${ready_suffix}" ] ; then + log "ERROR" "Folder \"${l_local_bind_folder}${ready_suffix}\" exist already!" >&2 + log "ERROR" "Leave folder \"${l_local_bind_folder}\" as it is. Check it manually." >&2 + exitcode=1 + else + mv "${l_local_bind_folder}" "${l_local_bind_folder}${ready_suffix}" + fi + fi + return "${exicode}" } +# ============================================================================== # Define function to restore data to Victoria Metrics function restore_data { - docker run --rm --network="host" -v "opentsdb-data-${2}":/tmp kilda-otsdb-dump-restore kilda-otsdb-restore --request-size-limit 1048576 "${1}" && docker volume rm "opentsdb-data-${2}" || echo "Failed to restore data to Victoria Metrics" >&2 + local l_victoria_metrics_endpoint="${1}" + local l_local_bind_folder="${2}" + log "INFO" "Restoring data from \"${l_local_bind_folder}\" to \"${l_victoria_metrics_endpoint}\"" >> "${background_log}" +if [ -z "${DEBUG}" ] ; then + docker run --rm --network="host" \ + -v "${l_local_bind_folder}":/tmp \ + "${default_docker_image_name}" \ + kilda-otsdb-restore --request-size-limit 1048576 "${l_victoria_metrics_endpoint}" +else + delay=5 + log "DEBUG" "Sleep ${delay} in restore" >> "${background_log}" + sleep "${delay}" +fi + local exicode="${?}" + if [ "${exicode}" -ne "0" ] ; then + log "ERROR" "Failed to restore data to Victoria Metrics" >> "${background_log}" + else + { + log "INFO" "Restore for \"${l_local_bind_folder}\" successfully finished." + log "INFO" "Remove folder \"${_c_yellow_}${l_local_bind_folder}${_c_none_}\"" + rm -rf "${l_local_bind_folder}" + exitcode="${?}" + [ "${exitcode}" -ne "0" ] && log "ERROR" "Failed to remove folder \"${l_local_bind_folder}\"" + } >> "${background_log}" + fi + return "${exicode}" +} + + +# ============================================================================== setup Ctrl+C handler for a restore +function exit_restoreloop() { + # increment keypress counter + ((stop_restore_loop++)) + { + # newline + echo + log "WARNING" "Ctrl+C catched \"${stop_restore_loop}\" time(s)" + log "WARNING" "Finishing restore loop" + } >> "${background_log}" } -function increment_date() -{ - local __resultvar=$1 - eval $__resultvar=$($date -d "${start_date} ${increment}" +${interval_format}) +# ============================================================================== restore loop function +function restore_loop() { + stop_restore_loop=0 + dump_was_finished=0 + # setup trap handler for a background process + trap exit_restoreloop SIGINT + # setup trap handler for finished dumps. there will be no more new dumps. need to finish all we have and stop process. + trap 'dump_was_finished=1' SIGUSR1 + + log "INFO" "Restore loop process started" >> "${background_log}" + local l_folder_to_process + while [ -e "${marker_for_execution}" ] && [ "${stop_restore_loop}" -eq "0" ] ; do + # take the first line sorted by date folders list with specified by regex mask + l_folder_to_process=$(find "${dump_storage_folder}" -mindepth 1 -maxdepth 1 -type d \ + -regex '^'"${dump_storage_folder}"'/'"${dump_folder_prefix}"'.+'"${ready_suffix}"'$' \ + -printf "%TFT%TT\t%p\n" | sort | head -n1 | cut -d$'\t' -f2) + + # if folder with data to process is not found + if [ -z "${l_folder_to_process}" ] ; then + # and if dump was finished - exit the loop. work done. + [ -n "${dump_was_finished}" ] && break + log "INFO" "No folder to process. Sleep for a \"${delay_between_checks_new_folder}\" seconds" >> "${background_log}" + sleep "${delay_between_checks_new_folder}" + continue + fi + # run dump + restore_data "${victoria_metrics_endpoint}" "${l_folder_to_process}" + done + [ ! -e "${marker_for_execution}" ] && log "INFO" "Exit from loop because of marker file not exist anymore \"${marker_for_execution}\"" >> "${background_log}" + log "INFO" "Restore loop process finished" >> "${background_log}" } + + + +# ============================================================================== setup Ctrl+C handler for a dump +function exit_dumploop() { + # increment count of keypres + ((stop_dump_loop++)) + # print newline + echo >&2 + log "WARNING" "Ctrl+C pressed \"${stop_dump_loop}\" time(s)" >&2 + if [ -z "${dump_status}" ] ; then + if [ -n "${stop_dump_loop}" ] && [ ! "${stop_dump_loop}" -le "1" ] ; then + log "WARNING" "Finishing dump loop" >&2 + else + log "WARNING" "once - ignoring..." >&2 + fi + fi + # if we press 3 times and more - we send Ctrl+C to background process + if [ "${stop_dump_loop}" -ge "3" ] ; then + log "WARNING" "Third and more Ctrl+C sending to the background process \"${background_process_pid}\"" >&2 + local l_result + if ! l_result=$(kill -SIGINT "${background_process_pid}" 2>&1) ; then + log "WARNING" "kill returned error: \"${l_result}\"" >&2 + fi + else + log "WARNING" "(${stop_dump_loop}/3) Third and more Ctrl+C will be send to background process" >&2 + fi +} +stop_dump_loop=0 +trap exit_dumploop SIGINT + +# create the marker file +# while this file exist - background process will wait for a new folders to upload +touch "${marker_for_execution}" + +# ============================================================================== start background restore loop +restore_loop "${victoria_metrics_endpoint}" & +background_process_pid="${!}" +log "DEBUG" "background process pid:[${background_process_pid}]" >&2 +log "DEBUG" "Current pid:[${$}]" >&2 + +# save pid and background pid - just to have ability to monitor outside +echo -e "${$}\t${background_process_pid}" > "${marker_for_execution}" + +# ============================================================================== RUN MAIN DUMP LOOP # Loop through dates -while [[ "$start_date" < "$end_date" ]]; do - # trim : from date - volume_subfix=$(echo "${start_date}" | tr -d :) - # Create Docker volume for this iteration - docker volume create "opentsdb-data-${volume_subfix}" +while [[ "${start_date}" < "${end_date}" ]] && [ "${stop_dump_loop}" -le "1" ] ; do + take_next_dump="" + # take free inodes and disk space + read -r free_inodes free_space < <(awk '$1~/^[0-9]+$/{print $1" "$2; exit}' < <(df --output=iavail,avail "${dump_storage_folder}")) + if [ -z "${free_space}" ] ; then + log "ERROR" "Can't get free disk space for \"${dump_storage_folder}\"" >&2 + stop_dump_loop=1 + else + # we use bc because of possille huge numbers + if [ -n "$("${bc_cmd}" <<<"if(${free_inodes}>${min_inodes}){print \"OK\n\"}")" ] ; then #" + # take a size of folders by prefix mask + folder_list=$(find "${dump_storage_folder}" -mindepth 1 -maxdepth 1 -type d -regex '^'"${dump_storage_folder}"'/'"${dump_folder_prefix}"'.+$' -exec du -sb {} \;) + # take a bigest folder size if exist and minimal if less or not defined + bigest_folder=$(sort -nrk1,1 <<<"${folder_list}" | awk -v min_free_space="${min_free_space}" '$1~/^[0-9]+$/{a=$1; exit}END{if((!a)||(a${free_space}){print \"yes\n\"}") #" + fi + fi + if [ -n "${take_next_dump}" ] ; then + # Calculate end date for this iteration (value set by reference) + interval_end_date=$("${date_cmd}" -d "${start_date} ${increment}" "+${interval_format}") - # Calculate end date for this iteration - increment_date interval_end_date + dump_data "${start_date}" "${metrics_prefix}" "${interval_end_date}" "${opentsdb_endpoint}" "${concurrent_jobs}" "${query_frame_size}" - echo "Dumping data from ${start_date} to ${interval_end_date}" - dump_data "${start_date}" "${metrics_prefix}" "${interval_end_date}" "${opentsdb_endpoint}" "${volume_subfix}" "${concurrent_jobs}" "${query_frame_size}" + # Next iteration will start from current interval_end_date + start_date="${interval_end_date}" + else + log "INFO" "Dump suspended. Waiting for a space or inodes cleanup..." + log "INFO" "Free space \"${free_space}\", free inodes \"${free_inodes}\" for \"${dump_storage_folder}\"" + sleep "${delay_between_checks_new_folder}" + fi - echo "Restoring data from ${start_date} to ${interval_end_date} in background" - restore_data "${victoria_metrics_endpoint}" "${volume_subfix}" & + # printout tail of the background log + if [ -n "${show_background_log}" ] && [ -e "${background_log}" ] && [ "$(stat -c%s "${background_log}")" -gt "0" ] ; then + log "INFO" "tail from the log \"${_c_yellow_}${background_log}${_c_none_}\" of the background process \"$(jobs -p)\"" + tail -n 5 "${background_log}" | sed 's/^./[LOG] &/g' + fi +done - # Increment date by time interval - increment_date start_date + +log "INFO" "Dump loop finished." +log "INFO" "Sending SIGUSR1 to background process \"${background_process_pid}\"" +# set variable for Ctrl+C handler +dump_status="finished" +# send to background process info about finished dumps. it should stop when it fill not find folders to process +if ! result=$(kill -USR1 "${background_process_pid}" 2>&1) ; then + log "WARNING" "kill returned error: \"${result}\"" >&2 +fi + + +log "INFO" "Waiting for a background process \"${background_process_pid}\" to finish..." +while true ; do + wait -n "${background_process_pid}" >/dev/null 2>&1 + exitcode="${?}" + # exit code 127 - means process not found + [ "${exitcode}" -eq "127" ] && break done -wait +if [ -e "${marker_for_execution}" ] ; then + log "INFO" "Remove marker file \"${marker_for_execution}\" to inform background process about all dumps are finished" + rm "${marker_for_execution}" +else + log "WARNING" "Strange. The marker file \"${marker_for_execution}\" wasn't found at this point. Somebody removed it." >&2 +fi + +log "INFO" "Finished." From b761f52ad704b57512605309be3df7cb7e7d0d09 Mon Sep 17 00:00:00 2001 From: Dmitrii Beliakov Date: Tue, 28 Nov 2023 15:32:51 +0100 Subject: [PATCH 21/27] Remove the deprecated authentication plugin for the MySQL container. --- confd/templates/docker-compose/docker-compose.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/confd/templates/docker-compose/docker-compose.tmpl b/confd/templates/docker-compose/docker-compose.tmpl index 3149b8bd91f..da48eb182fd 100644 --- a/confd/templates/docker-compose/docker-compose.tmpl +++ b/confd/templates/docker-compose/docker-compose.tmpl @@ -127,7 +127,7 @@ services: mysql_db: hostname: mysql.pendev image: mysql:8.0.34 - command: --default-authentication-plugin=mysql_native_password + command: --default-authentication-plugin=caching_sha2_password volumes: - ./docker/mysql/setup_data:/docker-entrypoint-initdb.d - sql_data:/var/lib/mysql From 743f49ae92f88dd58cbb09f5e8168335dae17eea Mon Sep 17 00:00:00 2001 From: Dmitrii Beliakov Date: Thu, 30 Nov 2023 14:28:05 +0100 Subject: [PATCH 22/27] Adjust ordering of HA-flow history actions. --- .../hibernate/entities/history/HibernateHaFlowEvent.java | 8 +++++++- .../HibernateHistoryHaFlowEventRepository.java | 2 +- .../persistence/ferma/frames/HaFlowEventFrame.java | 3 ++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src-java/kilda-persistence-hibernate/src/main/java/org/openkilda/persistence/hibernate/entities/history/HibernateHaFlowEvent.java b/src-java/kilda-persistence-hibernate/src/main/java/org/openkilda/persistence/hibernate/entities/history/HibernateHaFlowEvent.java index e4ad152f786..1725fe17bd1 100644 --- a/src-java/kilda-persistence-hibernate/src/main/java/org/openkilda/persistence/hibernate/entities/history/HibernateHaFlowEvent.java +++ b/src-java/kilda-persistence-hibernate/src/main/java/org/openkilda/persistence/hibernate/entities/history/HibernateHaFlowEvent.java @@ -70,8 +70,14 @@ public class HibernateHaFlowEvent extends EntityBase implements HaFlowEventData @Column(name = "details") private String details; + /** + * The ordering relies on grouping messages in topologies: history messages for the same correlation ID arrive to + * the same worker and processed sequentially within a single operation (see fieldsGrouping in topology builders). + * However, timestamps are not generated by DB, so assumptions about the relation between timestamp and + * id cannot be made. + */ @OneToMany(mappedBy = "haFlowEvent", cascade = CascadeType.ALL) - @OrderBy("timestamp") + @OrderBy("timestamp, id") @LazyCollection(LazyCollectionOption.FALSE) private List eventActions = new ArrayList<>(); diff --git a/src-java/kilda-persistence-hibernate/src/main/java/org/openkilda/persistence/hibernate/repositories/HibernateHistoryHaFlowEventRepository.java b/src-java/kilda-persistence-hibernate/src/main/java/org/openkilda/persistence/hibernate/repositories/HibernateHistoryHaFlowEventRepository.java index e50d0d30aee..70a8cbb44fa 100644 --- a/src-java/kilda-persistence-hibernate/src/main/java/org/openkilda/persistence/hibernate/repositories/HibernateHistoryHaFlowEventRepository.java +++ b/src-java/kilda-persistence-hibernate/src/main/java/org/openkilda/persistence/hibernate/repositories/HibernateHistoryHaFlowEventRepository.java @@ -62,7 +62,7 @@ public List findByHaFlowIdAndTimeFrame(String haFlowId, () -> fetch(haFlowId, timeFrom, timeTo, maxCount).stream() .map(HaFlowEvent::new) .collect(Collectors.toList())); - // fetch does ordering [1,2,3,4,5] and limit to maxCount (let's say top 3) [1,2,3] + // fetch does the ordering [1,2,3,4,5] and limit to maxCount (let's say top 3) [1,2,3] // then we reverse the collection [3,2,1]. // This is different from having the opposite ordering in the query: order [5,4,3,2,1] and top 3: [5,4,3] Collections.reverse(results); diff --git a/src-java/kilda-persistence-tinkerpop/src/main/java/org/openkilda/persistence/ferma/frames/HaFlowEventFrame.java b/src-java/kilda-persistence-tinkerpop/src/main/java/org/openkilda/persistence/ferma/frames/HaFlowEventFrame.java index f64fd1ef671..2c3e887e29e 100644 --- a/src-java/kilda-persistence-tinkerpop/src/main/java/org/openkilda/persistence/ferma/frames/HaFlowEventFrame.java +++ b/src-java/kilda-persistence-tinkerpop/src/main/java/org/openkilda/persistence/ferma/frames/HaFlowEventFrame.java @@ -90,7 +90,8 @@ public List getEventActions() { .hasLabel(HaFlowEventActionFrame.FRAME_LABEL) .has(HaFlowEventActionFrame.TASK_ID_PROPERTY, getTaskId())) .toListExplicit(HaFlowEventActionFrame.class).stream() - .sorted(Comparator.comparing(HaFlowEventActionFrame::getTimestamp)) + .sorted(Comparator.comparing(HaFlowEventActionFrame::getTimestamp) + .thenComparing(x -> x.getId())) .map(HaFlowEventAction::new) .collect(Collectors.toList()); } From f46445f44ee847bb26ea2920831b43a9d5307bc6 Mon Sep 17 00:00:00 2001 From: Yuliia Miroshnychenko Date: Mon, 4 Dec 2023 17:53:54 +0100 Subject: [PATCH 23/27] [TEST]: #5504: Storm: Updating network topology manipulation --- .../functionaltests/helpers/DockerHelper.groovy | 8 ++++++++ .../helpers/WfmManipulator.groovy | 16 ++++++++++++++++ .../helpers/model/ContainerName.groovy | 3 ++- .../spec/xresilience/StormLcmSpec.groovy | 11 ++++++++--- 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/src-java/testing/functional-tests/src/main/groovy/org/openkilda/functionaltests/helpers/DockerHelper.groovy b/src-java/testing/functional-tests/src/main/groovy/org/openkilda/functionaltests/helpers/DockerHelper.groovy index 7aa99edc38b..9c3b0c65a7c 100644 --- a/src-java/testing/functional-tests/src/main/groovy/org/openkilda/functionaltests/helpers/DockerHelper.groovy +++ b/src-java/testing/functional-tests/src/main/groovy/org/openkilda/functionaltests/helpers/DockerHelper.groovy @@ -59,4 +59,12 @@ class DockerHelper { private String getNetworkName() { dockerClient.listNetworks()*.name().find { it.contains('_default') && it.contains('kilda') } } + + String execute(String containerId, String [] command) { + def execCreation = dockerClient.execCreate(containerId, command, + DockerClient.ExecCreateParam.attachStdout(), DockerClient.ExecCreateParam.attachStderr()) + def output = dockerClient.execStart(execCreation.id()) + def execOutput = output.readFully() + return execOutput + } } diff --git a/src-java/testing/functional-tests/src/main/groovy/org/openkilda/functionaltests/helpers/WfmManipulator.groovy b/src-java/testing/functional-tests/src/main/groovy/org/openkilda/functionaltests/helpers/WfmManipulator.groovy index ddb1201c0a2..6782942e5f3 100644 --- a/src-java/testing/functional-tests/src/main/groovy/org/openkilda/functionaltests/helpers/WfmManipulator.groovy +++ b/src-java/testing/functional-tests/src/main/groovy/org/openkilda/functionaltests/helpers/WfmManipulator.groovy @@ -1,5 +1,8 @@ package org.openkilda.functionaltests.helpers +import java.util.regex.Pattern + +import static org.openkilda.functionaltests.helpers.model.ContainerName.STORM import static org.openkilda.functionaltests.helpers.model.ContainerName.WFM import com.spotify.docker.client.DockerClient @@ -39,14 +42,27 @@ class WfmManipulator { } } + String getStormActualNetworkTopology() { + String stormUIContainerId = dockerHelper."get container by name"(STORM).id() + String[] topologiesList = ["sh", "-c", "PATH=\${PATH}:/opt/storm/bin; storm list | grep network"] + String commandOutput = dockerHelper.execute(stormUIContainerId, topologiesList) + Pattern pattern = ~/network\w*/ + assert pattern.matcher(commandOutput).find(), "Something went wrong, network topology name has not been retrieved: \n $commandOutput" + //in the blue/green mode, all topologies have a name format: topologyName_mode (mode: blue/green, ex.: network_blue) + // to deploy/kill topology use the format topologyName-mode (ex. network-blue) + return pattern.matcher(commandOutput).findAll().first().toString().replace("_", "-") + } + def killTopology(String topologyName) { log.warn "Killing wfm $topologyName topology" manipulateTopology("kill", topologyName) + log.info "WFM $topologyName topology has been deleted" } def deployTopology(String topologyName) { log.warn "Deploying wfm $topologyName topology" manipulateTopology("deploy", topologyName) + log.info("WFM $topologyName topology has been deployed") } private def manipulateTopology(String action, String topologyName) { diff --git a/src-java/testing/functional-tests/src/main/groovy/org/openkilda/functionaltests/helpers/model/ContainerName.groovy b/src-java/testing/functional-tests/src/main/groovy/org/openkilda/functionaltests/helpers/model/ContainerName.groovy index 97da2d1a621..41a460e509f 100644 --- a/src-java/testing/functional-tests/src/main/groovy/org/openkilda/functionaltests/helpers/model/ContainerName.groovy +++ b/src-java/testing/functional-tests/src/main/groovy/org/openkilda/functionaltests/helpers/model/ContainerName.groovy @@ -3,7 +3,8 @@ package org.openkilda.functionaltests.helpers.model enum ContainerName { GRPC("grpc-speaker"), GRPC_STUB("grpc-stub"), - WFM("wfm") + WFM("wfm"), + STORM("storm-ui") private final String id; diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/xresilience/StormLcmSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/xresilience/StormLcmSpec.groovy index 98abb763851..ef181362064 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/xresilience/StormLcmSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/xresilience/StormLcmSpec.groovy @@ -21,6 +21,7 @@ import org.openkilda.testing.Constants import org.springframework.beans.factory.annotation.Value import spock.lang.Ignore import spock.lang.Isolated +import spock.lang.Issue import spock.lang.Narrative import spock.lang.Shared @@ -110,10 +111,14 @@ class StormLcmSpec extends HealthCheckSpecification { } @Ignore + @Issue("https://github.com/telstra/open-kilda/issues/5506 (ISL between deactivated switches is in a DISCOVERED state)") @Tags(LOW_PRIORITY) def "System's able to fail an ISL if switches on both ends go offline during restart of network topology"() { + given: "Actual network topology" + String networkTopologyName = wfmManipulator.getStormActualNetworkTopology() + when: "Kill network topology" - wfmManipulator.killTopology("network") + wfmManipulator.killTopology(networkTopologyName) and: "Disconnect switches on both ends of ISL" def islUnderTest = topology.islsForActiveSwitches.first() @@ -121,7 +126,7 @@ class StormLcmSpec extends HealthCheckSpecification { def dstBlockData = lockKeeper.knockoutSwitch(islUnderTest.dstSwitch, RW) and: "Deploy network topology back" - wfmManipulator.deployTopology("network") + wfmManipulator.deployTopology(networkTopologyName) def networkDeployed = true TimeUnit.SECONDS.sleep(45) //after deploy topology needs more time to actually begin working @@ -139,7 +144,7 @@ class StormLcmSpec extends HealthCheckSpecification { } cleanup: - !networkDeployed && wfmManipulator.deployTopology("network") + networkTopologyName && !networkDeployed && wfmManipulator.deployTopology(networkTopologyName) srcBlockData && lockKeeper.reviveSwitch(islUnderTest.srcSwitch, srcBlockData) dstBlockData && lockKeeper.reviveSwitch(islUnderTest.dstSwitch, dstBlockData) Wrappers.wait(discoveryTimeout + WAIT_OFFSET * 3) { From e94403d7e9ca4fcaa67107d1fcf1a7de1be35179 Mon Sep 17 00:00:00 2001 From: pkazlenka Date: Mon, 4 Dec 2023 13:42:45 +0100 Subject: [PATCH 24/27] [TEST] Fixed tests after changes in history API pt.2 Implements #5390 * Increased timeout in flaky reroute test * Adhered expected flow history entry in another test --- .../openkilda/functionaltests/spec/flows/AutoRerouteSpec.groovy | 2 +- .../openkilda/functionaltests/spec/flows/FlowHistorySpec.groovy | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/AutoRerouteSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/AutoRerouteSpec.groovy index adf0e959b00..e51f32d4256 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/AutoRerouteSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/AutoRerouteSpec.groovy @@ -826,7 +826,7 @@ class AutoRerouteIsolatedSpec extends HealthCheckSpecification { then: "System tries to reroute a flow with transit switch" def flowPathMap = [(firstFlow.flowId): firstFlowMainPath, (secondFlow.flowId): secondFlowPath] - wait(WAIT_OFFSET * 2) { + wait(WAIT_OFFSET * 3) { def firstFlowHistory = flowHelper.getHistoryEntriesByAction(firstFlow.flowId, REROUTE_ACTION) assert firstFlowHistory.last().payload.find { it.action == REROUTE_FAIL } //check that system doesn't retry to reroute the firstFlow (its src is down, no need to retry) diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowHistorySpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowHistorySpec.groovy index ca2212692ff..eed1df5336b 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowHistorySpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowHistorySpec.groovy @@ -374,7 +374,7 @@ class FlowHistorySpec extends HealthCheckSpecification { and: "The root cause('Switch is not active') is registered in flow history" Wrappers.wait(WAIT_OFFSET) { def flowHistory = flowHelper.getEarliestHistoryEntryByAction(flow.flowId, REROUTE_ACTION) - assert flowHistory.payload[0].action == "Started flow validation" + assert flowHistory.payload[0].action == "Flow rerouting operation has been started." assert flowHistory.payload[1].action == "ValidateFlowAction failed: Flow's $flow.flowId src switch is not active" assert flowHistory.payload[2].action == REROUTE_FAIL } From 069d5df649f452d4aa0d4193c6f31617e671b3a6 Mon Sep 17 00:00:00 2001 From: pkazlenka Date: Fri, 8 Dec 2023 10:30:22 +0100 Subject: [PATCH 25/27] #5420: [TEST] Ignore failing WB5164 mirror check * Mirrored packets on traffgen are not checked until we find the cause of the problem with traffgen. --- .../functionaltests/spec/flows/MirrorEndpointsSpec.groovy | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/MirrorEndpointsSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/MirrorEndpointsSpec.groovy index 2a468869287..e0c6dc86dc2 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/MirrorEndpointsSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/MirrorEndpointsSpec.groovy @@ -173,7 +173,8 @@ class MirrorEndpointsSpec extends HealthCheckSpecification { } and: "Traffic is also received at the mirror point (check only if second tg available)" - if (mirrorTg) { + //https://github.com/telstra/open-kilda/issues/5420 + if (mirrorTg && !swPair.src.isWb5164()) { assert mirrorPortStats.get().rxPackets - rxPacketsBefore > 0 } @@ -310,7 +311,8 @@ class MirrorEndpointsSpec extends HealthCheckSpecification { def mirrorPortStats = mirrorTg ? new TraffgenStats(traffExam, mirrorTg, [mirrorEndpoint.sinkEndpoint.vlanId]) : null def rxPacketsBefore = mirrorPortStats?.get()?.rxPackets verifyTraffic(traffExam, flow, mirrorDirection) - if (mirrorTg) { + //https://github.com/telstra/open-kilda/issues/5420 + if (mirrorTg && !swPair.src.isWb5164()) { assert mirrorPortStats.get().rxPackets - rxPacketsBefore > 0 } From 8389892228c3a1e32a04d0cb495ea0d41651f0c3 Mon Sep 17 00:00:00 2001 From: pkazlenka Date: Fri, 24 Nov 2023 12:02:12 +0100 Subject: [PATCH 26/27] [TEST] Refactoring the way to choose switch pairs for test * Topology helper methods getRandomSwitchPair(), getSingleSwitchPair(), getAllSingleSwitchPairs(), getNeighboringSwitchPair(), getNotNeighboringSwitchPair(), getAllNeighboringSwitchPairs() (partially) were replaced with methods from SwitchPairs class. --- .../helpers/TopologyHelper.groovy | 56 +--------- .../helpers/model/SwitchPairs.groovy | 101 +++++++++++++++--- .../functionaltests/BaseSpecification.groovy | 4 + .../configuration/ConfigurationSpec.groovy | 9 +- .../spec/flows/AutoRerouteSpec.groovy | 33 +++--- .../spec/flows/BandwidthSpec.groovy | 14 +-- .../spec/flows/FlowCrudSpec.groovy | 12 +-- .../spec/flows/FlowCrudV1Spec.groovy | 2 +- .../spec/flows/FlowDiversitySpec.groovy | 2 +- .../spec/flows/FlowLoopSpec.groovy | 12 +-- .../spec/flows/FlowPingSpec.groovy | 2 +- .../spec/flows/FlowSyncSpec.groovy | 2 +- .../flows/FlowValidationNegativeSpec.groovy | 26 ++--- .../spec/flows/PartialUpdateSpec.groovy | 2 +- .../spec/flows/ProtectedPathSpec.groovy | 2 +- .../spec/flows/ProtectedPathV1Spec.groovy | 2 +- .../spec/flows/QinQFlowSpec.groovy | 2 +- .../spec/flows/SwapEndpointSpec.groovy | 26 ++--- .../spec/flows/yflows/YFlowRerouteSpec.groovy | 12 +-- .../spec/links/LinkSpec.groovy | 4 +- .../spec/network/PathCheckSpec.groovy | 14 +-- .../spec/network/PathsSpec.groovy | 12 +-- .../spec/switches/LagPortSpec.groovy | 10 +- .../spec/switches/SwitchActivationSpec.groovy | 2 +- .../spec/switches/SwitchSyncSpec.groovy | 2 +- .../spec/xresilience/ContentionSpec.groovy | 4 +- .../spec/xresilience/StormLcmSpec.groovy | 2 +- .../spec/EnduranceSpec.groovy | 2 +- .../spec/EnduranceV2Spec.groovy | 3 +- 29 files changed, 199 insertions(+), 177 deletions(-) diff --git a/src-java/testing/functional-tests/src/main/groovy/org/openkilda/functionaltests/helpers/TopologyHelper.groovy b/src-java/testing/functional-tests/src/main/groovy/org/openkilda/functionaltests/helpers/TopologyHelper.groovy index c360a4e5ac8..7e9765e913f 100644 --- a/src-java/testing/functional-tests/src/main/groovy/org/openkilda/functionaltests/helpers/TopologyHelper.groovy +++ b/src-java/testing/functional-tests/src/main/groovy/org/openkilda/functionaltests/helpers/TopologyHelper.groovy @@ -41,62 +41,14 @@ class TopologyHelper { @Autowired FloodlightsHelper flHelper - /** - * Get a switch pair of random switches. - * - * @param forceDifferent whether to exclude the picked src switch when looking for dst switch - * @deprecated Use new mechanism from org.openkilda.functionaltests.helpers.model.SwitchPairs class - */ - @Deprecated - Tuple2 getRandomSwitchPair(boolean forceDifferent = true) { - def randomSwitch = { List switches -> - switches[new Random().nextInt(switches.size())] - } - def src = randomSwitch(topology.activeSwitches) - def dst = randomSwitch(forceDifferent ? topology.activeSwitches - src : topology.activeSwitches) - return new Tuple2(src, dst) + List getAllSwitchPairs(boolean includeReverse = true) { + return getSwitchPairs(includeReverse) } - SwitchPairs getAllSwitchPairs(boolean includeReverse = false) { - return new SwitchPairs(getSwitchPairs(includeReverse)) - } - - /** - * @deprecated Use new mechanism from org.openkilda.functionaltests.helpers.model.SwitchPairs class - */ - @Deprecated - SwitchPair getSingleSwitchPair() { - return SwitchPair.singleSwitchInstance(topology.activeSwitches.first()) - } - - /** - * @deprecated Use new mechanism from org.openkilda.functionaltests.helpers.model.SwitchPairs class - */ - @Deprecated List getAllSingleSwitchPairs() { return topology.activeSwitches.collect { SwitchPair.singleSwitchInstance(it) } } - /** - * @deprecated Use new mechanism from org.openkilda.functionaltests.helpers.model.SwitchPairs class - */ - @Deprecated - SwitchPair getNeighboringSwitchPair() { - getSwitchPairs().find { - it.paths.min { it.size() }?.size() == 2 - } - } - - /** - * @deprecated Use new mechanism from org.openkilda.functionaltests.helpers.model.SwitchPairs class - */ - @Deprecated - SwitchPair getNotNeighboringSwitchPair() { - getSwitchPairs().find { - it.paths.min { it.size() }?.size() > 2 - } - } - /** * @deprecated Use new mechanism from org.openkilda.functionaltests.helpers.model.SwitchPairs class */ @@ -208,9 +160,9 @@ class TopologyHelper { } SwitchTriplet findSwitchTripletWithSharedEpInTheMiddleOfTheChain() { - def pairSharedEpAndEp1 = getAllSwitchPairs().neighbouring().random() + def pairSharedEpAndEp1 = new SwitchPairs(getAllSwitchPairs()).neighbouring().random() //shared endpoint should be in the middle of the switches chain to deploy ha-flow without shared path - def pairEp1AndEp2 = getAllSwitchPairs().neighbouring().excludePairs([pairSharedEpAndEp1]).includeSwitch(pairSharedEpAndEp1.src).random() + def pairEp1AndEp2 = new SwitchPairs(getAllSwitchPairs()).neighbouring().excludePairs([pairSharedEpAndEp1]).includeSwitch(pairSharedEpAndEp1.src).random() Switch thirdSwitch = pairSharedEpAndEp1.src == pairEp1AndEp2.dst ? pairEp1AndEp2.src : pairEp1AndEp2.dst return switchTriplets.find { it.shared.dpId == pairSharedEpAndEp1.src.dpId diff --git a/src-java/testing/functional-tests/src/main/groovy/org/openkilda/functionaltests/helpers/model/SwitchPairs.groovy b/src-java/testing/functional-tests/src/main/groovy/org/openkilda/functionaltests/helpers/model/SwitchPairs.groovy index 4cb86e5ac46..1aaac3b5c44 100644 --- a/src-java/testing/functional-tests/src/main/groovy/org/openkilda/functionaltests/helpers/model/SwitchPairs.groovy +++ b/src-java/testing/functional-tests/src/main/groovy/org/openkilda/functionaltests/helpers/model/SwitchPairs.groovy @@ -1,54 +1,85 @@ package org.openkilda.functionaltests.helpers.model +import org.openkilda.functionaltests.helpers.SwitchHelper +import org.openkilda.functionaltests.helpers.TopologyHelper +import org.springframework.beans.factory.annotation.Autowired +import org.springframework.context.annotation.Scope +import org.springframework.stereotype.Component + import static org.junit.jupiter.api.Assumptions.assumeFalse import org.openkilda.testing.model.topology.TopologyDefinition.Switch +import static org.openkilda.model.SwitchFeature.NOVIFLOW_COPY_FIELD +import static org.springframework.beans.factory.config.ConfigurableBeanFactory.SCOPE_PROTOTYPE + /** * Class which simplifies search for corresponding switch pair. Just chain existing methods to combine requirements - * Usage: topologyHelper.getAllSwitchPairs() + * Usage: switchPairs.all() * .nonNeighouring() * .withAllTraffgerns() * .random() * Also it eliminates need to verify if no switch can be found (skips test immediately) */ + +@Component +@Scope(SCOPE_PROTOTYPE) class SwitchPairs { - private List switchPairs + List switchPairs + @Autowired + SwitchHelper switchHelper + @Autowired + TopologyHelper topologyHelper SwitchPairs(List switchPairs) { this.switchPairs = switchPairs } + SwitchPairs all(Boolean includeReverse = true) { + switchPairs = topologyHelper.getAllSwitchPairs(includeReverse) + return this + } + + SwitchPairs singleSwitch() { + switchPairs = topologyHelper.getAllSingleSwitchPairs() + return this + } + SwitchPairs withAtLeastNNonOverlappingPaths(int nonOverlappingPaths) { - return new SwitchPairs(switchPairs.findAll { - it.paths.unique(false) { a, b -> a.intersect(b) == [] ? 1 : 0 }.size() >= nonOverlappingPaths}) + switchPairs = switchPairs.findAll { + it.paths.unique(false) { a, b -> a.intersect(b) == [] ? 1 : 0 }.size() >= nonOverlappingPaths + } + return this } SwitchPairs withShortestPathShorterThanOthers() { - return new SwitchPairs(switchPairs.findAll {it.getPaths()[0].size() != it.getPaths()[1].size()}) + switchPairs = switchPairs.findAll { it.getPaths()[0].size() != it.getPaths()[1].size() } + return this } SwitchPairs nonNeighbouring() { - return new SwitchPairs(switchPairs.findAll { it.paths.min { it.size() }?.size() > 2}) + switchPairs = switchPairs.findAll { it.paths.min { it.size() }?.size() > 2 } + return this } + SwitchPairs neighbouring() { - return new SwitchPairs(switchPairs.findAll { it.paths.min { it.size() }?.size() == 2}) + switchPairs = switchPairs.findAll { it.paths.min { it.size() }?.size() == 2 } + return this } SwitchPairs excludePairs(List excludePairs) { - return new SwitchPairs(switchPairs.findAll { !excludePairs.contains(it) }) + switchPairs = switchPairs.findAll { !excludePairs.contains(it) } + return this } SwitchPairs sortedByShortestPathLengthAscending() { - return new SwitchPairs(switchPairs.sort {it.paths.min { it.size() }?.size() > 2}) - } - - SwitchPairs sortedBySmallestPathsAmount() { - return new SwitchPairs(switchPairs.sort{it.paths.size()}) + switchPairs = switchPairs.sort { it.paths.min { it.size() }?.size() > 2 } + return this } SwitchPair random() { - return new SwitchPairs(switchPairs.shuffled()).first() + switchPairs = switchPairs.shuffled() + return this.first() } SwitchPair first() { @@ -57,14 +88,52 @@ class SwitchPairs { } SwitchPairs includeSwitch(Switch sw) { - return new SwitchPairs(switchPairs.findAll { it.src == sw || it.dst == sw}) + switchPairs = switchPairs.findAll { it.src == sw || it.dst == sw } + return this } SwitchPairs excludeSwitches(List switchesList) { - return new SwitchPairs(switchPairs.findAll { !(it.src in switchesList) || !(it.dst in switchesList)}) + switchPairs = switchPairs.findAll { !(it.src in switchesList) || !(it.dst in switchesList) } + return this } List collectSwitches() { switchPairs.collectMany { return [it.src, it.dst] }.unique() } + + SwitchPairs withAtLeastNTraffgensOnSource(int traffgensConnectedToSource) { + switchPairs = switchPairs.findAll { it.getSrc().getTraffGens().size() >= traffgensConnectedToSource } + return this + } + + SwitchPairs withBothSwitchesVxLanEnabled() { + switchPairs = switchPairs.findAll { [it.src, it.dst].every { sw -> switchHelper.isVxlanEnabled(sw.dpId) } } + return this + } + + SwitchPairs withIslRttSupport() { + this.assertAllSwitchPairsAreNeighbouring() + switchPairs = switchPairs.findAll { [it.src, it.dst].every { it.features.contains(NOVIFLOW_COPY_FIELD) } } + return this + } + + SwitchPairs withExactlyNIslsBetweenSwitches(int expectedIslsBetweenSwitches) { + this.assertAllSwitchPairsAreNeighbouring() + switchPairs = switchPairs.findAll { it.paths.findAll { it.size() == 2 }.size() == expectedIslsBetweenSwitches } + return this + } + + SwitchPairs withMoreThanNIslsBetweenSwitches(int expectedMinimalIslsBetweenSwitches) { + this.assertAllSwitchPairsAreNeighbouring() + switchPairs = switchPairs.findAll { + it.paths.findAll { it.size() == 2 } + .size() > expectedMinimalIslsBetweenSwitches + } + return this + } + + private void assertAllSwitchPairsAreNeighbouring() { + assert switchPairs.size() == this.neighbouring().getSwitchPairs().size(), + "This method is applicable only to the neighbouring switch pairs" + } } diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/BaseSpecification.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/BaseSpecification.groovy index 5aa26372afa..eb5ccc0272c 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/BaseSpecification.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/BaseSpecification.groovy @@ -1,5 +1,7 @@ package org.openkilda.functionaltests +import org.openkilda.functionaltests.helpers.model.SwitchPairs + import static groovyx.gpars.GParsPool.withPool import static org.junit.jupiter.api.Assumptions.assumeTrue @@ -69,6 +71,8 @@ class BaseSpecification extends Specification { StatsHelper statsHelper @Autowired @Shared LabService labService + @Autowired @Shared + SwitchPairs switchPairs @Value('${spring.profiles.active}') @Shared String profile diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/configuration/ConfigurationSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/configuration/ConfigurationSpec.groovy index 77609c00b74..c12e1e0b8c7 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/configuration/ConfigurationSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/configuration/ConfigurationSpec.groovy @@ -35,12 +35,13 @@ class ConfigurationSpec extends HealthCheckSpecification { @Shared FlowEncapsulationType defaultEncapsulationType = FlowEncapsulationType.TRANSIT_VLAN + def "System takes into account default flow encapsulation type while creating a flow"() { when: "Create a flow without encapsulation type" - def switchPair = topologyHelper.getAllNeighboringSwitchPairs().find { swP -> - [swP.src, swP.dst].every { sw -> switchHelper.isVxlanEnabled(sw.dpId) } - } - assumeTrue(switchPair != null, "Unable to find required switch pair in topology") + def switchPair = switchPairs.all() + .neighbouring() + .withBothSwitchesVxLanEnabled() + .random() def flow1 = flowHelperV2.randomFlow(switchPair) flow1.encapsulationType = null flowHelperV2.addFlow(flow1) diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/AutoRerouteSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/AutoRerouteSpec.groovy index e51f32d4256..57c0de987f9 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/AutoRerouteSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/AutoRerouteSpec.groovy @@ -52,7 +52,7 @@ class AutoRerouteSpec extends HealthCheckSpecification { @IterationTag(tags = [TOPOLOGY_DEPENDENT], iterationNameRegex = /vxlan/) def "Flow is rerouted when one of the #description flow ISLs fails"() { given: "A flow with one alternative path at least" - def data = flowData(topologyHelper.getAllNeighboringSwitchPairs(), 1) + def data = flowData(switchPairs.all().neighbouring().getSwitchPairs(), 1) FlowRequestV2 flow = data[0] def allFlowPaths = data[1] flowHelperV2.addFlow(flow) @@ -491,13 +491,10 @@ class AutoRerouteSpec extends HealthCheckSpecification { @Tags(HARDWARE) def "Flow in 'UP' status is not rerouted after switchUp event"() { given: "Two active neighboring switches which support round trip latency" - def switchPair = topologyHelper.getAllNeighboringSwitchPairs().find { swP -> - swP.paths.findAll { path -> - path.size() == 2 && pathHelper.getInvolvedSwitches(path).every { - it.features.contains(SwitchFeature.NOVIFLOW_COPY_FIELD) - } - } - } ?: assumeTrue(false, "No suiting switches found.") + def switchPair = switchPairs.all() + .neighbouring() + .withIslRttSupport() + .random() and: "A flow on the given switch pair" def flow = flowHelperV2.randomFlow(switchPair) @@ -547,10 +544,10 @@ class AutoRerouteSpec extends HealthCheckSpecification { def "Flow is not rerouted when switchUp event appear for a switch which is not related to the flow"() { given: "Given a flow in DOWN status on neighboring switches" - def switchPair = topologyHelper.getAllNeighboringSwitchPairs().find { - it.paths.findAll { it.size() == 2 }.size() == 1 - } ?: assumeTrue(false, "No suiting switches found") - + def switchPair = switchPairs.all() + .neighbouring() + .withExactlyNIslsBetweenSwitches(1) + .random() def flowPath = switchPair.paths.min { it.size() } def flow = flowHelperV2.randomFlow(switchPair) flowHelperV2.addFlow(flow) @@ -715,7 +712,8 @@ triggering one more reroute of the current path" } def noIntermediateSwitchFlow(int minAltPathsCount = 0, boolean getAllPaths = false) { - def flowWithPaths = getFlowWithPaths(topologyHelper.getAllNeighboringSwitchPairs(), minAltPathsCount) + def flowWithPaths = getFlowWithPaths(switchPairs.all().neighbouring().getSwitchPairs(), + minAltPathsCount) return getAllPaths ? flowWithPaths : flowWithPaths[0] } @@ -754,9 +752,10 @@ class AutoRerouteIsolatedSpec extends HealthCheckSpecification { def "Flow in 'Down' status is rerouted after switchUp event"() { given: "First switch pair with two parallel links and two available paths" assumeTrue(rerouteDelay * 2 < discoveryTimeout, "Reroute should be completed before link is FAILED") - def switchPair1 = topologyHelper.getAllNeighboringSwitchPairs().find { - it.paths.findAll { it.size() == 2 }.size() > 1 - } ?: assumeTrue(false, "No suiting switches found for the first flow") + def switchPair1 = switchPairs.all() + .neighbouring() + .withMoreThanNIslsBetweenSwitches(1) + .random() // disable auto-reroute on islDiscovery event northbound.toggleFeature(FeatureTogglesDto.builder().flowsRerouteOnIslDiscoveryEnabled(false).build()) @@ -1019,7 +1018,7 @@ Failed to find path with requested bandwidth= ignored" } def noIntermediateSwitchFlow(int minAltPathsCount = 0, boolean getAllPaths = false) { - def flowWithPaths = getFlowWithPaths(topologyHelper.getAllNeighboringSwitchPairs(), minAltPathsCount) + def flowWithPaths = getFlowWithPaths(switchPairs.all().neighbouring().getSwitchPairs(), minAltPathsCount) return getAllPaths ? flowWithPaths : flowWithPaths[0] } diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/BandwidthSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/BandwidthSpec.groovy index 9e4fc0c07e9..91989c20e3a 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/BandwidthSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/BandwidthSpec.groovy @@ -23,7 +23,7 @@ class BandwidthSpec extends HealthCheckSpecification { @Tags(SMOKE) def "Available bandwidth on ISLs changes respectively when creating/updating/deleting a flow"() { given: "Two active not neighboring switches" - def switchPair = topologyHelper.getNotNeighboringSwitchPair() + def switchPair = switchPairs.all().nonNeighbouring().random() when: "Create a flow with a valid bandwidth" def linksBeforeFlowCreate = northbound.getAllLinks() @@ -111,7 +111,7 @@ class BandwidthSpec extends HealthCheckSpecification { def "Unable to exceed bandwidth limit on ISL when creating a flow"() { given: "Two active switches" - def switchPair = topologyHelper.getNeighboringSwitchPair() + def switchPair = switchPairs.all().neighbouring().random() when: "Create a flow with a bandwidth that exceeds available bandwidth on ISL" def involvedBandwidths = [] @@ -135,7 +135,7 @@ class BandwidthSpec extends HealthCheckSpecification { def "Unable to exceed bandwidth limit on ISL when updating a flow"() { given: "Two active switches" - def switchPair = topologyHelper.getNeighboringSwitchPair() + def switchPair = switchPairs.all().neighbouring().random() when: "Create a flow with a valid bandwidth" def maximumBandwidth = 1000 @@ -166,7 +166,7 @@ class BandwidthSpec extends HealthCheckSpecification { def "Able to exceed bandwidth limit on ISL when creating/updating a flow with ignore_bandwidth=true"() { given: "Two active switches" - def switchPair = topologyHelper.getNeighboringSwitchPair() + def switchPair = switchPairs.all().neighbouring().random() when: "Create a flow with a bandwidth that exceeds available bandwidth on ISL (ignore_bandwidth=true)" def linksBeforeFlowCreate = northbound.getAllLinks() @@ -206,7 +206,7 @@ class BandwidthSpec extends HealthCheckSpecification { def "Able to update bandwidth to maximum link speed without using alternate links"() { given: "Two active neighboring switches" - def switchPair = topologyHelper.getNeighboringSwitchPair() + def switchPair = switchPairs.all().neighbouring().random() // We need to handle the case when there are parallel links between chosen switches. So we make all parallel // links except the first link not preferable to avoid flow reroute when updating the flow. @@ -247,7 +247,7 @@ class BandwidthSpec extends HealthCheckSpecification { def "System doesn't allow to exceed bandwidth limit on ISL while updating a flow with ignore_bandwidth=false"() { given: "Two active switches" - def switchPair = topologyHelper.getNeighboringSwitchPair() + def switchPair = switchPairs.all().neighbouring().random() when: "Create a flow with a bandwidth that exceeds available bandwidth on ISL (ignore_bandwidth=true)" def linksBeforeFlowCreate = northbound.getAllLinks() @@ -288,7 +288,7 @@ class BandwidthSpec extends HealthCheckSpecification { @Tags([LOW_PRIORITY]) def "Unable to exceed bandwidth limit on ISL when creating a flow [v1 api]"() { given: "Two active switches" - def switchPair = topologyHelper.getNeighboringSwitchPair() + def switchPair = switchPairs.all().neighbouring().random() when: "Create a flow with a bandwidth that exceeds available bandwidth on ISL" def involvedBandwidths = [] diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowCrudSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowCrudSpec.groovy index b4554c04336..4c66762d755 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowCrudSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowCrudSpec.groovy @@ -467,7 +467,7 @@ class FlowCrudSpec extends HealthCheckSpecification { def "Error is returned if there is no available path to #data.isolatedSwitchType switch"() { given: "A switch that has no connection to other switches" - def isolatedSwitch = topologyHelper.notNeighboringSwitchPair.src + def isolatedSwitch = switchPairs.all().nonNeighbouring().random().src def flow = data.getFlow(isolatedSwitch) topology.getBusyPortsForSwitch(isolatedSwitch).each { port -> antiflap.portDown(isolatedSwitch.dpId, port) @@ -568,7 +568,7 @@ class FlowCrudSpec extends HealthCheckSpecification { def "Unable to create a flow with #problem"() { given: "A flow with #problem" - def switchPair = topologyHelper.getNotNeighboringSwitchPair() + def switchPair = switchPairs.all().nonNeighbouring().random() def flow = flowHelperV2.randomFlow(switchPair, false) flow = update(flow) when: "Try to create a flow" @@ -825,7 +825,7 @@ Failed to find path with requested bandwidth=${IMPOSSIBLY_HIGH_BANDWIDTH}/) @Tags(LOW_PRIORITY) def "System allows to set/update description/priority/max-latency for a flow"() { given: "Two active neighboring switches" - def switchPair = topologyHelper.getNeighboringSwitchPair() + def switchPair = switchPairs.all().neighbouring().random() and: "Value for each field" def initPriority = 100 @@ -878,7 +878,7 @@ Failed to find path with requested bandwidth=${IMPOSSIBLY_HIGH_BANDWIDTH}/) def "System doesn't ignore encapsulationType when flow is created with ignoreBandwidth = true"() { given: "Two active switches" - def swPair = topologyHelper.getNeighboringSwitchPair().find { + def swPair = switchPairs.all().neighbouring().getSwitchPairs().find { [it.src, it.dst].any { !switchHelper.isVxlanEnabled(it.dpId) } } ?: assumeTrue(false, "Unable to find required switches in topology") @@ -1245,7 +1245,7 @@ types .* or update switch properties and add needed encapsulation type./).matche def "System allows to update single switch flow to multi switch flow"() { given: "A single switch flow with enabled lldp/arp on the dst side" assumeTrue(useMultitable, "This test can be run in multiTable mode due to lldp/arp") - def swPair = topologyHelper.getNeighboringSwitchPair() + def swPair = switchPairs.all().neighbouring().random() def flow = flowHelperV2.singleSwitchFlow(swPair.src) flow.destination.detectConnectedDevices.lldp = true flow.destination.detectConnectedDevices.arp = true @@ -1401,7 +1401,7 @@ types .* or update switch properties and add needed encapsulation type./).matche @Tags(LOW_PRIORITY) def "Unable to update to a flow with maxLatencyTier2 higher as maxLatency)"() { given: "A flow" - def swPair = topologyHelper.getRandomSwitchPair() + def swPair = switchPairs.singleSwitch().random() def flow = flowHelperV2.randomFlow(swPair) flowHelperV2.addFlow(flow) diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowCrudV1Spec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowCrudV1Spec.groovy index 549c9b16ade..4df4eba3ed9 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowCrudV1Spec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowCrudV1Spec.groovy @@ -494,7 +494,7 @@ class FlowCrudV1Spec extends HealthCheckSpecification { def "Error is returned if there is no available path to #data.isolatedSwitchType switch"() { given: "A switch that has no connection to other switches" - def isolatedSwitch = topologyHelper.notNeighboringSwitchPair.src + def isolatedSwitch = switchPairs.all().nonNeighbouring().random().src def flow = data.getFlow(isolatedSwitch) topology.getBusyPortsForSwitch(isolatedSwitch).each { port -> antiflap.portDown(isolatedSwitch.dpId, port) diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowDiversitySpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowDiversitySpec.groovy index 162e954812d..3f218fe03dd 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowDiversitySpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowDiversitySpec.groovy @@ -322,7 +322,7 @@ class FlowDiversitySpec extends HealthCheckSpecification { def "Able to get flow paths with correct overlapping segments stats (casual + single-switch flows)"() { given: "Two active not neighboring switches" - def switchPair = topologyHelper.getNotNeighboringSwitchPair() + def switchPair = switchPairs.all().nonNeighbouring().random() and: "Create a casual flow going through these switches" def flow1 = flowHelperV2.randomFlow(switchPair, false) flowHelperV2.addFlow(flow1) diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowLoopSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowLoopSpec.groovy index 10eef1c139d..14f8da80063 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowLoopSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowLoopSpec.groovy @@ -356,7 +356,7 @@ class FlowLoopSpec extends HealthCheckSpecification { def "System is able to detect and sync missing flowLoop rules"() { given: "An active flow with created flowLoop on the src switch" - def switchPair = topologyHelper.getNeighboringSwitchPair() + def switchPair = switchPairs.all().neighbouring().random() def flow = flowHelperV2.randomFlow(switchPair) flowHelperV2.addFlow(flow) northboundV2.createFlowLoop(flow.flowId, new FlowLoopPayload(switchPair.src.dpId)) @@ -627,7 +627,7 @@ class FlowLoopSpec extends HealthCheckSpecification { @Tags(LOW_PRIORITY) def "Attempt to create the exact same flowLoop twice just reinstalls the rules"() { given: "An active multi switch flow with created flowLoop on the src switch" - def switchPair = topologyHelper.getNeighboringSwitchPair() + def switchPair = switchPairs.all().neighbouring().random() def flow = flowHelperV2.randomFlow(switchPair) flowHelperV2.addFlow(flow) northboundV2.createFlowLoop(flow.flowId, new FlowLoopPayload(switchPair.src.dpId)) @@ -670,7 +670,7 @@ class FlowLoopSpec extends HealthCheckSpecification { @Tags(LOW_PRIORITY) def "Unable to create flowLoop when a switch is deactivated"() { given: "An active flow" - def switchPair = topologyHelper.getNeighboringSwitchPair() + def switchPair = switchPairs.all().neighbouring().random() def flow = flowHelperV2.randomFlow(switchPair) flowHelperV2.addFlow(flow) @@ -706,7 +706,7 @@ class FlowLoopSpec extends HealthCheckSpecification { @Tags(LOW_PRIORITY) def "Unable to create flowLoop on the src switch when it is already created on the dst switch"() { given: "An active flow with created flowLoop on the src switch" - def switchPair = topologyHelper.getNeighboringSwitchPair() + def switchPair = switchPairs.all().neighbouring().random() def flow = flowHelperV2.randomFlow(switchPair) flowHelperV2.addFlow(flow) northboundV2.createFlowLoop(flow.flowId, new FlowLoopPayload(switchPair.src.dpId)) @@ -726,7 +726,7 @@ class FlowLoopSpec extends HealthCheckSpecification { @Tags(LOW_PRIORITY) def "Unable to create flowLoop on a transit switch"() { given: "An active multi switch flow with transit switch" - def switchPair = topologyHelper.getNotNeighboringSwitchPair() + def switchPair = switchPairs.all().nonNeighbouring().random() def flow = flowHelperV2.randomFlow(switchPair) flowHelperV2.addFlow(flow) def transitSwId = PathHelper.convert(northbound.getFlowPath(flow.flowId))[1].switchId @@ -770,7 +770,7 @@ class FlowLoopSpec extends HealthCheckSpecification { @Tags(LOW_PRIORITY) def "Unable to create flowLoop on a non existent switch"() { given: "An active multi switch flow" - def swP = topologyHelper.getNeighboringSwitchPair() + def swP = switchPairs.all().neighbouring().random() def flow = flowHelperV2.randomFlow(swP) flowHelperV2.addFlow(flow) diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowPingSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowPingSpec.groovy index 77448019958..3cd28637289 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowPingSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowPingSpec.groovy @@ -332,7 +332,7 @@ class FlowPingSpec extends HealthCheckSpecification { def "Able to turn on periodic pings on a flow"() { when: "Create a flow with periodic pings turned on" - def endpointSwitches = topologyHelper.notNeighboringSwitchPair + def endpointSwitches = switchPairs.all().nonNeighbouring().random() def flow = flowHelperV2.randomFlow(endpointSwitches).tap { it.periodicPings = true } diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowSyncSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowSyncSpec.groovy index 70fa258ed09..e87f20d8500 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowSyncSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowSyncSpec.groovy @@ -29,7 +29,7 @@ class FlowSyncSpec extends HealthCheckSpecification { @Tags([SMOKE_SWITCHES, SMOKE]) def "Able to synchronize a flow (install missing flow rules, reinstall existing) without rerouting"() { given: "An intermediate-switch flow with deleted rules on src switch" - def switchPair = topologyHelper.getNotNeighboringSwitchPair() + def switchPair = switchPairs.all().nonNeighbouring().random() assumeTrue(switchPair.asBoolean(), "Need a not-neighbouring switch pair for this test") def flow = flowHelperV2.randomFlow(switchPair) diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowValidationNegativeSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowValidationNegativeSpec.groovy index 3aeaa57c063..f10b7416f5a 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowValidationNegativeSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/FlowValidationNegativeSpec.groovy @@ -84,19 +84,19 @@ class FlowValidationNegativeSpec extends HealthCheckSpecification { [flowToBreak, intactFlow].each { it && flowHelperV2.deleteFlow(it.flowId) } where: - flowConfig | switchPair | item | switchNo | flowType - "single switch" | getTopologyHelper().getSingleSwitchPair() | 0 | "single" | "forward" - "single switch" | getTopologyHelper().getSingleSwitchPair() | 0 | "single" | "reverse" - "neighbouring" | getTopologyHelper().getNeighboringSwitchPair() | 0 | "first" | "forward" - "neighbouring" | getTopologyHelper().getNeighboringSwitchPair() | 0 | "first" | "reverse" - "neighbouring" | getTopologyHelper().getNeighboringSwitchPair() | 1 | "last" | "forward" - "neighbouring" | getTopologyHelper().getNeighboringSwitchPair() | 1 | "last" | "reverse" - "transit" | getTopologyHelper().getNotNeighboringSwitchPair() | 0 | "first" | "forward" - "transit" | getTopologyHelper().getNotNeighboringSwitchPair() | 0 | "first" | "reverse" - "transit" | getTopologyHelper().getNotNeighboringSwitchPair() | 1 | "middle" | "forward" - "transit" | getTopologyHelper().getNotNeighboringSwitchPair() | 1 | "middle" | "reverse" - "transit" | getTopologyHelper().getNotNeighboringSwitchPair() | -1 | "last" | "forward" - "transit" | getTopologyHelper().getNotNeighboringSwitchPair() | -1 | "last" | "reverse" + flowConfig | switchPair | item | switchNo | flowType + "single switch" | switchPairs.singleSwitch().random() | 0 | "single" | "forward" + "single switch" | switchPairs.singleSwitch().random() | 0 | "single" | "reverse" + "neighbouring" | switchPairs.all().neighbouring().random() | 0 | "first" | "forward" + "neighbouring" | switchPairs.all().neighbouring().random() | 0 | "first" | "reverse" + "neighbouring" | switchPairs.all().neighbouring().random() | 1 | "last" | "forward" + "neighbouring" | switchPairs.all().neighbouring().random() | 1 | "last" | "reverse" + "transit" | switchPairs.all().nonNeighbouring().random() | 0 | "first" | "forward" + "transit" | switchPairs.all().nonNeighbouring().random() | 0 | "first" | "reverse" + "transit" | switchPairs.all().nonNeighbouring().random() | 1 | "middle" | "forward" + "transit" | switchPairs.all().nonNeighbouring().random() | 1 | "middle" | "reverse" + "transit" | switchPairs.all().nonNeighbouring().random() | -1 | "last" | "forward" + "transit" | switchPairs.all().nonNeighbouring().random() | -1 | "last" | "reverse" } def "Unable to #data.description a non-existent flow"() { diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/PartialUpdateSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/PartialUpdateSpec.groovy index 8fa8561ab40..5c72c4731d1 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/PartialUpdateSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/PartialUpdateSpec.groovy @@ -237,7 +237,7 @@ class PartialUpdateSpec extends HealthCheckSpecification { def "Able to do partial update on a single-switch flow"() { given: "A single-switch flow" - def swPair = topologyHelper.singleSwitchPair + def swPair = switchPairs.singleSwitch().random() def flow = flowHelperV2.randomFlow(swPair) flowHelperV2.addFlow(flow) def originalCookies = northbound.getSwitchRules(swPair.src.dpId).flowEntries.findAll { diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/ProtectedPathSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/ProtectedPathSpec.groovy index b1288f251dd..4e2afd1ce7b 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/ProtectedPathSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/ProtectedPathSpec.groovy @@ -1432,7 +1432,7 @@ doesn't have links with enough bandwidth, Failed to find path with requested ban @IterationTag(tags = [LOW_PRIORITY], iterationNameRegex = /unmetered/) def "Unable to create #flowDescription flow with protected path if all alternative paths are unavailable"() { given: "Two active neighboring switches without alt paths" - def switchPair = topologyHelper.getNeighboringSwitchPair() + def switchPair = switchPairs.all().neighbouring().random() List broughtDownPorts = [] switchPair.paths.sort { it.size() }[1..-1].unique { diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/ProtectedPathV1Spec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/ProtectedPathV1Spec.groovy index 05eecfb30c4..e4e8a6cbbf2 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/ProtectedPathV1Spec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/ProtectedPathV1Spec.groovy @@ -217,7 +217,7 @@ class ProtectedPathV1Spec extends HealthCheckSpecification { def "Unable to create #flowDescription flow with protected path if all alternative paths are unavailable"() { given: "Two active neighboring switches without alt paths" - def switchPair = topologyHelper.getNeighboringSwitchPair() + def switchPair = switchPairs.all().neighbouring().random() List broughtDownPorts = [] switchPair.paths.sort { it.size() }[1..-1].unique { diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/QinQFlowSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/QinQFlowSpec.groovy index 4ccb3e086cd..cb9bae9708d 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/QinQFlowSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/QinQFlowSpec.groovy @@ -313,7 +313,7 @@ class QinQFlowSpec extends HealthCheckSpecification { [srcVlanId, srcInnerVlanId, dstVlanId, dstInnerVlanId, swPair] << [ [[10, 20, 30, 40], [10, 20, 0, 0]], - getUniqueSwitchPairs(topologyHelper.getAllSingleSwitchPairs()) + getUniqueSwitchPairs(switchPairs.singleSwitch().getSwitchPairs()) ].combinations().collect { it.flatten() } trafficDisclaimer = swPair.src.traffGens.size > 1 ? "" : " !WARN: No traffic check!" } diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/SwapEndpointSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/SwapEndpointSpec.groovy index 2edc89c70e0..40844bd3954 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/SwapEndpointSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/SwapEndpointSpec.groovy @@ -79,7 +79,7 @@ class SwapEndpointSpec extends HealthCheckSpecification { where: data << [ [description: "no vlan vs vlan on the same port on src switch"].tap { - def switchPair = getTopologyHelper().getNotNeighboringSwitchPair() + def switchPair = getTopologyHelper().getAllSwitchPairs().nonNeighbouring().random() def flow1 = getFlowHelper().randomFlow(switchPair) flow1.source.portNumber = getFreePort(switchPair.src, [switchPair.dst]) flow1.source.vlanId = 0 @@ -92,7 +92,7 @@ class SwapEndpointSpec extends HealthCheckSpecification { getFlowHelper().toFlowEndpointV2(flow2.destination)) }, [description: "same port, swap vlans on dst switch + third idle novlan flow on that port"].tap { - def switchPair = getTopologyHelper().getNotNeighboringSwitchPair() + def switchPair = getTopologyHelper().getAllSwitchPairs().nonNeighbouring().random() def flow1 = getFlowHelper().randomFlow(switchPair) def flow2 = getFlowHelper().randomFlow(switchPair, false, [flow1]) flow1.destination.portNumber = getFreePort(switchPair.dst, [switchPair.src]) @@ -108,7 +108,7 @@ class SwapEndpointSpec extends HealthCheckSpecification { getFlowHelper().toFlowEndpointV2(flow1.destination)) }, [description: "vlan on src1 <-> vlan on dst2, same port numbers"].tap { - def switchPair = getTopologyHelper().getNotNeighboringSwitchPair() + def switchPair = getTopologyHelper().getAllSwitchPairs().nonNeighbouring().random() def flow1 = getFlowHelper().randomFlow(switchPair) def flow2 = getFlowHelper().randomFlow(switchPair, false, [flow1]) flow1.source.portNumber = getFreePort(switchPair.src, [switchPair.dst]) @@ -122,7 +122,7 @@ class SwapEndpointSpec extends HealthCheckSpecification { getFlowHelper().toFlowEndpointV2(flow2.destination).tap { it.vlanId = flow1.source.vlanId }) }, [description: "port on dst1 <-> port on src2, vlans are equal"].tap { - def switchPair = getTopologyHelper().getNotNeighboringSwitchPair() + def switchPair = getTopologyHelper().getAllSwitchPairs().nonNeighbouring().random() def flow1 = getFlowHelper().randomFlow(switchPair, false) def flow2 = getFlowHelper().randomFlow(switchPair, false, [flow1]) flow1.destination.portNumber = getFreePort(switchPair.dst, [switchPair.src], @@ -141,7 +141,7 @@ class SwapEndpointSpec extends HealthCheckSpecification { getFlowHelper().toFlowEndpointV2(flow2.destination)) }, [description: "switch on src1 <-> switch on dst2, other params random"].tap { - def switchPair = getTopologyHelper().getNotNeighboringSwitchPair() + def switchPair = getTopologyHelper().getAllSwitchPairs().nonNeighbouring().random() def flow1 = getFlowHelper().randomFlow(switchPair) def flow2 = getFlowHelper().randomFlow(switchPair, false, [flow1]) flow1.source.portNumber = getFreePort(switchPair.src, [switchPair.dst]) @@ -157,7 +157,7 @@ class SwapEndpointSpec extends HealthCheckSpecification { .tap { it.switchId = flow1.source.datapath }) }, [description: "both endpoints swap, same switches"].tap { - def switchPair = getTopologyHelper().getNotNeighboringSwitchPair() + def switchPair = getTopologyHelper().getAllSwitchPairs().nonNeighbouring().random() def flow1 = getFlowHelper().randomFlow(switchPair) def flow2 = getFlowHelper().randomFlow(switchPair, false, [flow1]) flow1.source.portNumber = getFreePort(switchPair.src, [switchPair.dst]) @@ -173,7 +173,7 @@ class SwapEndpointSpec extends HealthCheckSpecification { getFlowHelper().toFlowEndpointV2(flow1.destination)) }, [description: "endpoints src1 <-> dst2, same switches"].tap { - def switchPair = getTopologyHelper().getNotNeighboringSwitchPair() + def switchPair = getTopologyHelper().getAllSwitchPairs().nonNeighbouring().random() def flow1 = getFlowHelper().randomFlow(switchPair) def flow2 = getFlowHelper().randomFlow(switchPair, false, [flow1]) flow1.source.portNumber = getFreePort(switchPair.src, [switchPair.dst]) @@ -424,7 +424,7 @@ switches"() { endpointsPart << ["vlans", "ports", "switches"] proprtyName << ["vlanId", "portNumber", "datapath"] description = "src1 <-> dst2, dst1 <-> src2" - flow1SwitchPair = getTopologyHelper().getNotNeighboringSwitchPair() + flow1SwitchPair = getTopologyHelper().getAllSwitchPairs().nonNeighbouring().random() flow2SwitchPair = getDifferentNotNeighboringSwitchPair(flow1SwitchPair) flow1 = getFirstFlow(flow1SwitchPair, flow2SwitchPair) flow2 = getSecondFlow(flow1SwitchPair, flow2SwitchPair, flow1) @@ -502,7 +502,7 @@ switches"() { it.flow2Src = it.flow1.destination it.flow2Dst = it.flow2.destination }].collect { iterationData -> - def flow1SwitchPair = getTopologyHelper().getNotNeighboringSwitchPair() + def flow1SwitchPair = getTopologyHelper().getAllSwitchPairs().nonNeighbouring().random() def flow2SwitchPair = getDifferentNotNeighboringSwitchPair(flow1SwitchPair) def flow1 = getFirstFlow(flow1SwitchPair, flow2SwitchPair) [flow1SwitchPair: flow1SwitchPair, flow2SwitchPair: flow2SwitchPair, flow1: flow1].tap(iterationData) @@ -515,7 +515,7 @@ switches"() { def "Unable to swap endpoints for existing flow and non-existing flow"() { given: "An active flow" - def switchPair = topologyHelper.getNeighboringSwitchPair() + def switchPair = switchPairs.all().neighbouring().random() def flow1 = flowHelper.randomFlow(switchPair) def flow2 = flowHelper.randomFlow(switchPair) flowHelper.addFlow(flow1) @@ -649,7 +649,7 @@ switches"() { flow2Src = changePropertyValue(flow2.source, "portNumber", flow1.destination.portNumber) flow2Dst = flow2.destination }].collect { iterationData -> - def flow1SwitchPair = getTopologyHelper().getNotNeighboringSwitchPair() + def flow1SwitchPair = getTopologyHelper().getAllSwitchPairs().nonNeighbouring().random() def flow2SwitchPair = getDifferentNotNeighboringSwitchPair(flow1SwitchPair) def flow1 = getFirstFlow(flow1SwitchPair, flow2SwitchPair) [flow1SwitchPair: flow1SwitchPair, flow2SwitchPair: flow2SwitchPair, flow1: flow1].tap(iterationData) @@ -702,7 +702,7 @@ switches"() { flow2Src = changePropertyValue(flow2.source, "portNumber", flow1.source.portNumber) flow2Dst = flow1.destination }].collect { iterationData -> - def flow1SwitchPair = getTopologyHelper().getNotNeighboringSwitchPair() + def flow1SwitchPair = getTopologyHelper().getAllSwitchPairs().nonNeighbouring().random() def flow2SwitchPair = getDifferentNotNeighboringSwitchPair(flow1SwitchPair) def flow1 = getFirstFlow(flow1SwitchPair, flow2SwitchPair) def flow2 = getSecondFlow(flow1SwitchPair, flow2SwitchPair, flow1) @@ -1168,7 +1168,7 @@ switches"() { flow2Src = flow1.destination flow2Dst = flow2.destination }].collect { iterationData -> - def flow1SwitchPair = getTopologyHelper().getNotNeighboringSwitchPair() + def flow1SwitchPair = getTopologyHelper().getAllSwitchPairs().nonNeighbouring().random() def flow2SwitchPair = getDifferentNotNeighboringSwitchPair(flow1SwitchPair) def flow1 = getFlowHelper().randomFlow(flow1SwitchPair) def flow2 = getFlowHelper().randomFlow(flow2SwitchPair, false, [flow1]).tap { diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/yflows/YFlowRerouteSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/yflows/YFlowRerouteSpec.groovy index 5200a2254e1..fcdb5010309 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/yflows/YFlowRerouteSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/flows/yflows/YFlowRerouteSpec.groovy @@ -180,9 +180,9 @@ class YFlowRerouteSpec extends HealthCheckSpecification { def "Y-Flow reroute has been executed when more preferable path is available for both sub-flows (shared path cost was changed)" () { given: "The appropriate switches have been collected" //y-flow with shared path is created when shared_ep+ep1->neighbour && ep1+ep2->neighbour && shared_ep+ep2->not neighbour - def pairSharedEpAndEp1 = topologyHelper.getAllSwitchPairs().neighbouring().first() - def sharedEpNeighbouringSwitches = topologyHelper.getAllSwitchPairs().neighbouring().includeSwitch(pairSharedEpAndEp1.src).collectSwitches() - def pairEp1AndEp2 = topologyHelper.getAllSwitchPairs().neighbouring().excludePairs([pairSharedEpAndEp1]) + def pairSharedEpAndEp1 = switchPairs.all().neighbouring().first() + def sharedEpNeighbouringSwitches = switchPairs.all().neighbouring().includeSwitch(pairSharedEpAndEp1.src).collectSwitches() + def pairEp1AndEp2 = switchPairs.all().neighbouring().excludePairs([pairSharedEpAndEp1]) .includeSwitch(pairSharedEpAndEp1.dst).excludeSwitches(sharedEpNeighbouringSwitches).first() def swT = new SwitchTriplet(shared: pairSharedEpAndEp1.src, ep1: pairEp1AndEp2.src, ep2: pairEp1AndEp2.dst) @@ -231,9 +231,9 @@ class YFlowRerouteSpec extends HealthCheckSpecification { def "Y-Flow reroute has been executed when more preferable path is available for one of the sub-flows" () { given: "The appropriate switches have been collected" //y-flow with shared path is created when shared_ep+ep1->neighbour && ep1+ep2->neighbour && shared_ep+ep2->not neighbour - def pairSharedEpAndEp1 = topologyHelper.getAllSwitchPairs().neighbouring().first() - def sharedEpNeighbouringSwitches = topologyHelper.getAllSwitchPairs().neighbouring().includeSwitch(pairSharedEpAndEp1.src).collectSwitches() - def pairEp1AndEp2 = topologyHelper.getAllSwitchPairs().neighbouring().excludePairs([pairSharedEpAndEp1]) + def pairSharedEpAndEp1 = switchPairs.all().neighbouring().first() + def sharedEpNeighbouringSwitches = switchPairs.all().neighbouring().includeSwitch(pairSharedEpAndEp1.src).collectSwitches() + def pairEp1AndEp2 = switchPairs.all().neighbouring().excludePairs([pairSharedEpAndEp1]) .includeSwitch(pairSharedEpAndEp1.dst).excludeSwitches(sharedEpNeighbouringSwitches).first() def swT = new SwitchTriplet(shared: pairSharedEpAndEp1.src, ep1: pairEp1AndEp2.src, ep2: pairEp1AndEp2.dst) diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/links/LinkSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/links/LinkSpec.groovy index a3c7cfd0fe3..24000d36204 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/links/LinkSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/links/LinkSpec.groovy @@ -132,7 +132,7 @@ class LinkSpec extends HealthCheckSpecification { @Tags(SMOKE) def "Get all flows (UP/DOWN) going through a particular link"() { given: "Two active not neighboring switches" - def switchPair = topologyHelper.getNotNeighboringSwitchPair() + def switchPair = switchPairs.all().nonNeighbouring().random() and: "Forward flow from source switch to destination switch" def flow1 = flowHelperV2.randomFlow(switchPair).tap { it.pinned = true } @@ -717,7 +717,7 @@ class LinkSpec extends HealthCheckSpecification { def "Unable to delete inactive link with flowPath"() { given: "An inactive link with flow on it" - def switchPair = topologyHelper.getNeighboringSwitchPair() + def switchPair = switchPairs.all().neighbouring().random() def flow = flowHelperV2.randomFlow(switchPair) flow.pinned = true flowHelperV2.addFlow(flow) diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/network/PathCheckSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/network/PathCheckSpec.groovy index befc23885f6..b3e159dfa61 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/network/PathCheckSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/network/PathCheckSpec.groovy @@ -22,7 +22,7 @@ class PathCheckSpec extends HealthCheckSpecification { @Tags(SMOKE) def "No path validation errors for valid path without limitations"() { given: "Path for non-neighbouring switches" - def path = topologyHelper.getAllSwitchPairs().nonNeighbouring().random() + def path = switchPairs.all().nonNeighbouring().random() .getPaths().sort { it.size() }.first() when: "Check the path without limitations" @@ -39,7 +39,7 @@ class PathCheckSpec extends HealthCheckSpecification { @Tags(SMOKE) def "Path check errors returned for each segment and each type of problem"() { given: "Path of at least three switches" - def switchPair = topologyHelper.getAllSwitchPairs().nonNeighbouring().random() + def switchPair = switchPairs.all().nonNeighbouring().random() def path = switchPair.getPaths() .sort { it.size() } .first() @@ -69,7 +69,7 @@ class PathCheckSpec extends HealthCheckSpecification { @Tags(LOW_PRIORITY) def "Latency check errors are returned for the whole existing flow"() { given: "Path of at least three switches" - def switchPair = topologyHelper.getAllSwitchPairs().nonNeighbouring().random() + def switchPair = switchPairs.all().nonNeighbouring().random() def path = switchPair.getPaths() .sort { it.size() } .first() @@ -100,7 +100,7 @@ class PathCheckSpec extends HealthCheckSpecification { @Tags(LOW_PRIORITY) def "Path intersection check errors are returned for each segment of existing flow"() { given: "Flow has been created successfully" - def switchPair = topologyHelper.getAllSwitchPairs().nonNeighbouring().first() + def switchPair = switchPairs.all().nonNeighbouring().first() def flow = flowHelperV2.addFlow(flowHelperV2.randomFlow(switchPair, false)) def flowPathDetails = northbound.getFlowPath(flow.flowId) @@ -133,8 +133,8 @@ class PathCheckSpec extends HealthCheckSpecification { @Tags(LOW_PRIORITY) def "Path intersection check errors are returned for each segment of each flow in diverse group"() { given: "List of required neighbouring switches has been collected" - def firstSwitchPair = topologyHelper.getAllSwitchPairs().neighbouring().random() - def secondSwitchPair = topologyHelper.getAllSwitchPairs().neighbouring().excludePairs([firstSwitchPair]) + def firstSwitchPair = switchPairs.all().neighbouring().random() + def secondSwitchPair = switchPairs.all().neighbouring().excludePairs([firstSwitchPair]) .includeSwitch(firstSwitchPair.dst).random() and:"Two flows in one diverse group have been created" @@ -147,7 +147,7 @@ class PathCheckSpec extends HealthCheckSpecification { def flow2Path = northbound.getFlowPath(flow2.flowId) when: "Check potential path that has intersection ONLY with one flow from diverse group" - LinkedList pathToCheck = topologyHelper.getAllSwitchPairs().neighbouring().excludePairs([firstSwitchPair, secondSwitchPair]) + LinkedList pathToCheck = switchPairs.all().neighbouring().excludePairs([firstSwitchPair, secondSwitchPair]) .includeSwitch(firstSwitchPair.src).random().paths.first() if(pathToCheck.last().switchId != firstSwitchPair.src.dpId) { diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/network/PathsSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/network/PathsSpec.groovy index 0c30e7ef382..3aadd623b6f 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/network/PathsSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/network/PathsSpec.groovy @@ -31,7 +31,7 @@ class PathsSpec extends HealthCheckSpecification { @Tags(SMOKE) def "Get paths between not neighboring switches"() { given: "Two active not neighboring switches" - def switchPair = topologyHelper.getAllSwitchPairs() + def switchPair = switchPairs.all() .nonNeighbouring() .random() @@ -63,7 +63,7 @@ class PathsSpec extends HealthCheckSpecification { @Tags(LOW_PRIORITY) def "Able to get paths between switches for the LATENCY strategy"() { given: "Two active not neighboring switches" - def switchPair = topologyHelper.getAllSwitchPairs() + def switchPair = switchPairs.all() .nonNeighbouring() .random() @@ -106,7 +106,7 @@ class PathsSpec extends HealthCheckSpecification { def "Unable to get paths with max_latency strategy without max latency parameter"() { given: "Two active not neighboring switches" - def switchPair = topologyHelper.getAllSwitchPairs() + def switchPair = switchPairs.all() .nonNeighbouring() .random() @@ -125,7 +125,7 @@ class PathsSpec extends HealthCheckSpecification { @Tags(LOW_PRIORITY) def "Unable to get a path for a 'vxlan' flowEncapsulationType when switches do not support it"() { given: "Two active not supported 'vxlan' flowEncapsulationType switches" - def switchPair = topologyHelper.getAllSwitchPairs().random() + def switchPair = switchPairs.all().random() Map initProps = [switchPair.src, switchPair.dst].collectEntries { [(it): switchHelper.getCachedSwProps(it.dpId)] } @@ -158,7 +158,7 @@ class PathsSpec extends HealthCheckSpecification { @Unroll def "Protected path is #isIncludedString included into path list if #isIncludedString requested"() { given: "Two switches with potential protected path" - def switchPair = topologyHelper.getAllSwitchPairs() + def switchPair = switchPairs.all() .withAtLeastNNonOverlappingPaths(2) .random() @@ -177,7 +177,7 @@ class PathsSpec extends HealthCheckSpecification { @Tags(LOW_PRIORITY) def "Protected path is null if it doesn't match criteria"() { given: "Two non-neighbouring switches with the one path shorter than others" - def switchPair = topologyHelper.getAllSwitchPairs() + def switchPair = switchPairs.all() .nonNeighbouring() .withShortestPathShorterThanOthers() .sortedByShortestPathLengthAscending() diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/switches/LagPortSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/switches/LagPortSpec.groovy index edabcf36431..970f894ce0d 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/switches/LagPortSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/switches/LagPortSpec.groovy @@ -202,12 +202,8 @@ class LagPortSpec extends HealthCheckSpecification { def "Able to create a singleSwitchFlow on a LAG port"() { given: "A switch with two traffgens and one LAG port" and: "A flow on the LAG port" - def allTraffGenSwitchIds = topology.activeTraffGens*.switchConnected*.dpId - assumeTrue(allTraffGenSwitchIds.size() > 1, "Unable to find active traffgen") - def swPair = topologyHelper.getAllSingleSwitchPairs().find { - it.src.dpId in allTraffGenSwitchIds && it.src.traffGens.size() > 1 - } - assumeTrue(swPair.asBoolean(), "Unable to find required switch in topology") + def swPair = switchPairs.singleSwitch() + .withAtLeastNTraffgensOnSource(2).random() def traffgenSrcSwPort = swPair.src.traffGens[0].switchPort def traffgenDstSwPort = swPair.src.traffGens[1].switchPort def payload = new LagPortRequest(portNumbers: [traffgenSrcSwPort]) @@ -337,7 +333,7 @@ on switch $sw.dpId is used as part of LAG port $lagPort/).matches(exc) def "Unable to create a LAG port with port which is used as mirrorPort"() { given: "A flow with mirrorPoint" - def swP = topologyHelper.getNeighboringSwitchPair() + def swP = switchPairs.all().neighbouring().random() def flow = flowHelperV2.randomFlow(swP, false) flowHelperV2.addFlow(flow) diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/switches/SwitchActivationSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/switches/SwitchActivationSpec.groovy index c21a23cc20c..e6c8ce076c7 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/switches/SwitchActivationSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/switches/SwitchActivationSpec.groovy @@ -44,7 +44,7 @@ class SwitchActivationSpec extends HealthCheckSpecification { @Tags([SMOKE, SMOKE_SWITCHES, LOCKKEEPER]) def "Missing flow rules/meters are installed on a new switch before connecting to the controller"() { given: "A switch with missing flow rules/meters and not connected to the controller" - def switchPair = topologyHelper.getNeighboringSwitchPair() + def switchPair = switchPairs.all().neighbouring().random() def flow = flowHelperV2.randomFlow(switchPair) flowHelperV2.addFlow(flow) diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/switches/SwitchSyncSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/switches/SwitchSyncSpec.groovy index 58a7f7e7b5a..5acd9c545d9 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/switches/SwitchSyncSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/switches/SwitchSyncSpec.groovy @@ -169,7 +169,7 @@ class SwitchSyncSpec extends HealthCheckSpecification { def "Able to synchronize #switchKind switch (delete excess rules and meters)"() { given: "Flow with intermediate switches" - def switchPair = topologyHelper.getAllSwitchPairs().nonNeighbouring().random() + def switchPair = switchPairs.all().nonNeighbouring().random() def flow = flowHelperV2.randomFlow(switchPair) flowHelperV2.addFlow(flow) def switchId = getSwitch(flow) diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/xresilience/ContentionSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/xresilience/ContentionSpec.groovy index 056b8fa6610..7db8596744a 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/xresilience/ContentionSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/xresilience/ContentionSpec.groovy @@ -22,7 +22,7 @@ class ContentionSpec extends BaseSpecification { when: "Create the same flow in parallel multiple times" def flowsAmount = 20 def group = new DefaultPGroup(flowsAmount) - def flow = flowHelperV2.randomFlow(topologyHelper.notNeighboringSwitchPair) + def flow = flowHelperV2.randomFlow(switchPairs.all().nonNeighbouring().random()) def tasks = (1..flowsAmount).collect { group.task { flowHelperV2.addFlow(flow) } } @@ -88,7 +88,7 @@ class ContentionSpec extends BaseSpecification { def "Reroute can be simultaneously performed with sync rules requests, removeExcess=#removeExcess"() { given: "A flow with reroute potential" - def switches = topologyHelper.getNotNeighboringSwitchPair() + def switches = switchPairs.all().nonNeighbouring().random() def flow = flowHelperV2.randomFlow(switches) flowHelperV2.addFlow(flow) def currentPath = pathHelper.convert(northbound.getFlowPath(flow.flowId)) diff --git a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/xresilience/StormLcmSpec.groovy b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/xresilience/StormLcmSpec.groovy index 98abb763851..bfc8e524b93 100644 --- a/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/xresilience/StormLcmSpec.groovy +++ b/src-java/testing/functional-tests/src/test/groovy/org/openkilda/functionaltests/spec/xresilience/StormLcmSpec.groovy @@ -58,7 +58,7 @@ class StormLcmSpec extends HealthCheckSpecification { List flows = [] def flowsAmount = topology.activeSwitches.size() * 3 flowsAmount.times { - def flow = flowHelperV2.randomFlow(*topologyHelper.getRandomSwitchPair(false), false, flows) + def flow = flowHelperV2.randomFlow(switchPairs.all().random(), false, flows) flow.maximumBandwidth = 500000 flowHelperV2.addFlow(flow) flows << flow diff --git a/src-java/testing/performance-tests/src/test/groovy/org/openkilda/performancetests/spec/EnduranceSpec.groovy b/src-java/testing/performance-tests/src/test/groovy/org/openkilda/performancetests/spec/EnduranceSpec.groovy index 906b58612d4..7508382da3f 100644 --- a/src-java/testing/performance-tests/src/test/groovy/org/openkilda/performancetests/spec/EnduranceSpec.groovy +++ b/src-java/testing/performance-tests/src/test/groovy/org/openkilda/performancetests/spec/EnduranceSpec.groovy @@ -202,7 +202,7 @@ idle, mass manual reroute. Step repeats pre-defined number of times" def createFlow(waitForRules = false, boolean protectedPath = false) { Wrappers.silent { - def flow = flowHelper.randomFlow(*topoHelper.getRandomSwitchPair(), false, flows) + def flow = flowHelper.randomFlow(*topoHelper.getAllSwitchPairs().random(), false, flows) flow.allocateProtectedPath = protectedPath log.info "creating flow $flow.id" waitForRules ? flowHelper.addFlow(flow) : northbound.addFlow(flow) diff --git a/src-java/testing/performance-tests/src/test/groovy/org/openkilda/performancetests/spec/EnduranceV2Spec.groovy b/src-java/testing/performance-tests/src/test/groovy/org/openkilda/performancetests/spec/EnduranceV2Spec.groovy index 390ceaad17d..01a08e4d149 100644 --- a/src-java/testing/performance-tests/src/test/groovy/org/openkilda/performancetests/spec/EnduranceV2Spec.groovy +++ b/src-java/testing/performance-tests/src/test/groovy/org/openkilda/performancetests/spec/EnduranceV2Spec.groovy @@ -166,7 +166,8 @@ idle, mass manual reroute, isl break. Step repeats pre-defined number of times" ] //define payload generating method that will be called each time flow creation is issued makeFlowPayload = { - def flow = flowHelperV2.randomFlow(*topoHelper.getRandomSwitchPair(), false, flows) + def flow = flowHelperV2.randomFlow(*topoHelper.getAllSwitchPairs().random(), + false, flows) flow.maximumBandwidth = 200000 flow.allocateProtectedPath = r.nextBoolean() return flow From 5ed5ddd6564fee80538af68f366afaf70a756c70 Mon Sep 17 00:00:00 2001 From: Pablo Murillo Date: Thu, 14 Dec 2023 10:30:49 +0100 Subject: [PATCH 27/27] Update CHANGELOG.md --- CHANGELOG.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4688ca9a17f..6b763cb1d78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,27 @@ # Changelog +## v1.150.0 (19/12/2023) + +### Features: +- [#4931](https://github.com/telstra/open-kilda/pull/4931) Tool for dump/restore OpenTSDB data + +### Bug Fixes: +- [#5513](https://github.com/telstra/open-kilda/pull/5513) #5420: [TEST] Ignore failing WB5164 mirror check (Issue: [#5420](https://github.com/telstra/open-kilda/issues/5420)) [**tests**] +- [#5501](https://github.com/telstra/open-kilda/pull/5501) Adjust ordering of HA-flow history actions. (Issue: [#5366](https://github.com/telstra/open-kilda/issues/5366)) + +### Improvements: +- [#5507](https://github.com/telstra/open-kilda/pull/5507) [TEST]: #5504: Storm: Updating network topology manipulation (Issue: [#5504](https://github.com/telstra/open-kilda/issues/5504)) [**tests**] +- [#5508](https://github.com/telstra/open-kilda/pull/5508) #5390: [TEST] Fixed tests after changes in history API pt.2 (Issues: [#5390](https://github.com/telstra/open-kilda/issues/5390) [#5390](https://github.com/telstra/open-kilda/issues/5390)) [**tests**] +- [#5491](https://github.com/telstra/open-kilda/pull/5491) Remove the deprecated authentication plugin for the MySQL container. (Issue: [#5460](https://github.com/telstra/open-kilda/issues/5460)) [**configuration**] +- [#5493](https://github.com/telstra/open-kilda/pull/5493) [TEST] Refactoring the way to choose switch pairs for test [**tests**] + + +For the complete list of changes, check out [the commit log](https://github.com/telstra/open-kilda/compare/v1.149.0...v1.150.0). + +### Affected Components: +history, otsdb + +--- + ## v1.149.0 (07/12/2023) ### Bug Fixes: