Skip to content

Commit

Permalink
CA-379329: check for missing iSCSI sessions and reconnect
Browse files Browse the repository at this point in the history
If, when an iSCSI SR is attached, some of the paths are not reachable
they will not be subsequently connected should the connectivity issues
be resolved, without detaching the SR or rebooting the host. Neither
of these options give a good experience for users of the software. Add
a systemd timer which will, periodically (default 10 minutes), ask
each SR to do SR specific "health" checks. Currently only a checker
for the iSCSI sessions is present but more can be added to this
pattern in the future as needs arise.

Signed-off-by: Mark Syms <[email protected]>
  • Loading branch information
MarkSymsCtx committed Jul 26, 2023
1 parent fdca39c commit 8fea0eb
Show file tree
Hide file tree
Showing 9 changed files with 520 additions and 232 deletions.
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ SM_LIBS += pluginutil
SM_LIBS += fcoelib
SM_LIBS += constants
SM_LIBS += cbtutil
SM_LIBS += sr_health_check

UDEV_RULES = 65-multipath 55-xs-mpath-scsidev 57-usb 58-xapi
MPATH_DAEMON = sm-multipath
Expand Down Expand Up @@ -164,6 +165,10 @@ install: precheck
$(SM_STAGING)/$(SYSTEMD_SERVICE_DIR)
install -m 644 systemd/storage-init.service \
$(SM_STAGING)/$(SYSTEMD_SERVICE_DIR)
install -m 644 systemd/sr_health_check.service \
$(SM_STAGING)/$(SYSTEMD_SERVICE_DIR)
install -m 644 systemd/sr_health_check.timer \
$(SM_STAGING)/$(SYSTEMD_SERVICE_DIR)
for i in $(UDEV_RULES); do \
install -m 644 udev/$$i.rules \
$(SM_STAGING)$(UDEV_RULES_DIR); done
Expand Down
2 changes: 1 addition & 1 deletion drivers/BaseISCSI.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def load(self, sr_uuid):
self.default_vdi_visibility = False

# Required parameters
if 'target' not in self.dconf or not self.dconf['target']:
if 'target' not in self.dconf or not self.dconf['target']:
raise xs_errors.XenError('ConfigTargetMissing')

# we are no longer putting hconf in the xml.
Expand Down
472 changes: 241 additions & 231 deletions drivers/LVHDoISCSISR.py

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions drivers/SR.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,10 @@ def load(self, sr_uuid):
"""Post-init hook"""
pass

def check_sr(self, sr_uuid):
"""Hook to check SR health"""
pass

def vdi(self, uuid):
"""Return VDI object owned by this repository"""
if uuid not in self.vdis:
Expand Down
60 changes: 60 additions & 0 deletions drivers/sr_health_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/python3

# Copyright (C) Cloud Software Group, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation; version 2.1 only.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

"""
Health check for SR, to be triggered periodically by a systemd timer. What is checked is
SR implementation type dependent.
"""

import SR
import util


def main():
"""
For all locally plugged SRs check that they are healthy
"""
try:
session = util.get_localAPI_session()
except SR.SROSError:
util.SMlog("Unable to open local XAPI session", priority=util.LOG_ERR)
return

localhost = util.get_localhost_ref(session)

sm_types = [x['type'] for x in session.xenapi.SM.get_all_records_where(
'field "required_api_version" = "1.0"').values()]
for sm_type in sm_types:
srs = session.xenapi.SR.get_all_records_where(
f'field "type" = "{sm_type}"')
for sr in srs:
pbds = session.xenapi.PBD.get_all_records_where(
f'field "SR" = "{sr}" and field "host" = "{localhost}"')
if not pbds:
continue

pbd_ref, pbd = pbds.popitem()
if not pbd['currently_attached']:
continue

sr_uuid = srs[sr]['uuid']
sr_obj = SR.SR.from_uuid(session, sr_uuid)
sr_obj.check_sr(sr_uuid)


if __name__ == "__main__":
main()
8 changes: 8 additions & 0 deletions systemd/sr_health_check.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[Unit]
Description=Healthy Check service for Storage Repositories
Wants=xapi-init-complete.target

[Service]
Type=oneshot
ExecStart=/opt/xensource/sm/sr_health_check.py

13 changes: 13 additions & 0 deletions systemd/sr_health_check.timer
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[Unit]
Description=Period SR health check

[Timer]
# Jitter it a bit
RandomizedDelaySec=30
# Run 10 minutes after first activated...
OnActiveSec=600
# ...and at 10-minute intervals thereafter
OnUnitInactiveSec=600

[Install]
WantedBy=timers.target
95 changes: 95 additions & 0 deletions tests/test_LVHDoISCSISR.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
import unittest
import unittest.mock as mock

from uuid import uuid4

import SR
import LVHDoISCSISR
from BaseISCSI import BaseISCSISR
import SRCommand
import util
import xs_errors

import testlib
from test_ISCSISR import NonInitingISCSISR

TEST_SR_UUID = 'test_uuid'


class RandomError(Exception):
pass
Expand Down Expand Up @@ -114,3 +121,91 @@ def test_1st_try_block_raise_RandomError(
str(cm.exception),
'General backend error [opterr=Raise RandomError]'
)


class TestLVHDoISCSISR(unittest.TestCase):

def setUp(self):
util_patcher = mock.patch('LVHDoISCSISR.util')
self.mock_util = util_patcher.start()
self.mock_util.sessions_less_than_targets = util.sessions_less_than_targets
baseiscsi_patcher = mock.patch('LVHDoISCSISR.BaseISCSI.BaseISCSISR')
patched_baseiscsi = baseiscsi_patcher.start()
self.mock_baseiscsi = mock.create_autospec(BaseISCSISR)
patched_baseiscsi.return_value = self.mock_baseiscsi
self.mock_session = mock.MagicMock()
xenapi_patcher = mock.patch('SR.XenAPI')
mock_xenapi = xenapi_patcher.start()
mock_xenapi.xapi_local.return_value = self.mock_session

copy_patcher = mock.patch('LVHDoISCSISR.SR.copy.deepcopy')
self.mock_copy = copy_patcher.start()

def deepcopy(to_copy):
return to_copy

self.mock_copy.side_effect = deepcopy

lock_patcher = mock.patch('LVHDSR.Lock')
self.mock_lock = lock_patcher.start()

self.addCleanup(mock.patch.stopall)

dummy_cmd = mock.create_autospec(SRCommand)
dummy_cmd.dconf = {
'SCSIid': '3600a098038313577792450384a4a6275',
'multihomelist': 'tgt1:3260,tgt2:3260',
'target': "10.70.89.34",
'targetIQN': 'iqn.2009-01.example.test:iscsi085e938a'
}
dummy_cmd.params = {
'command': 'nop',
'session_ref': 'test_session',
'host_ref': 'test_host',
'sr_ref': 'sr_ref'
}
dummy_cmd.cmd = None

self.sr_uuid = str(uuid4())
self.subject = LVHDoISCSISR.LVHDoISCSISR(
dummy_cmd, self.sr_uuid)

def test_check_sr_pbd_not_found(self):
# Arrange
self.mock_util.find_my_pbd.return_value = None

# Act
self.subject.check_sr(TEST_SR_UUID)

# Assert
self.mock_util.find_my_pbd.assert_called_with(
self.mock_session, 'test_host', 'sr_ref')

def test_check_sr_correct_sessions_count(self):
# Arrange
self.mock_util.find_my_pbd.return_value = 'my_pbd'
self.mock_session.xenapi.PBD.get_other_config.return_value = {
'iscsi_sessions': 2
}

# Act
self.subject.check_sr(TEST_SR_UUID)

# Assert
self.mock_session.xenapi.PBD.get_other_config.assert_called_with('my_pbd')

def test_check_sr_not_enough_sessions(self):
# Arrange
self.mock_util.find_my_pbd.return_value = 'my_pbd'
self.mock_session.xenapi.PBD.get_other_config.return_value = {
'iscsi_sessions': 1
}


# Act
self.subject.check_sr(TEST_SR_UUID)

# Assert
self.mock_baseiscsi.attach.assert_called_with(
TEST_SR_UUID
)
93 changes: 93 additions & 0 deletions tests/test_sr_health_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import unittest
import unittest.mock as mock

import sr_health_check
from SR import SR

TEST_HOST = 'test_host'

SR_UUID = 'sr uuid'


class TestSrHealthCheck(unittest.TestCase):

def setUp(self):
util_patcher = mock.patch('sr_health_check.util')
self.mock_util = util_patcher.start()
self.mock_session = mock.MagicMock()
self.mock_util.get_localAPI_session.return_value = self.mock_session
sr_patcher = mock.patch('sr_health_check.SR.SR', autospec=True)
self.mock_sr = sr_patcher.start()

self.addCleanup(mock.patch.stopall)

def expect_good_sr_record(self):
self.mock_session.xenapi.SR.get_all_records_where.return_value = {
"iscsi_ref": {'uuid': SR_UUID, 'host': TEST_HOST}
}

def expect_good_localhost(self):
self.mock_util.get_localhost_ref.return_value = TEST_HOST

def expect_good_sm_types(self):
self.mock_session.xenapi.SM.get_all_records_where.return_value = {
'lvmoiscsi_type_ref': {'type': 'lvmoiscsi'}
}

def test_health_check_no_srs(self):
# Arrange
self.expect_good_sm_types()
self.mock_session.xenapi.SR.get_all_records_where.return_value = {}

# Act
sr_health_check.main()

# Assert
self.mock_session.xenapi.SR.get_all_records_where.assert_called()

def test_health_check_no_local_pbd(self):
# Arrange
self.expect_good_localhost()
self.expect_good_sm_types()
self.expect_good_sr_record()
self.mock_session.xenapi.PBD.get_all_records_where.return_value = {}

# Act
sr_health_check.main()

# Assert
self.mock_session.xenapi.PBD.get_all_records_where.assert_called_with(
f'field "SR" = "iscsi_ref" and field "host" = "{TEST_HOST}"')

def test_health_check_sr_not_plugged(self):
# Arrange
self.expect_good_localhost()
self.expect_good_sm_types()
self.expect_good_sr_record()
self.mock_session.xenapi.PBD.get_all_records_where.return_value = {
'pbd_ref': {'currently_attached': False}
}

# Act
sr_health_check.main()

# Assert
self.mock_session.xenapi.PBD.get_all_records_where.assert_called_with(
f'field "SR" = "iscsi_ref" and field "host" = "{TEST_HOST}"')

def test_health_check_run_sr_check(self):
# Arrange
self.expect_good_localhost()
self.expect_good_sm_types()
self.expect_good_sr_record()
self.mock_session.xenapi.PBD.get_all_records_where.return_value = {
'pbd_ref': {'currently_attached': True}
}
mock_sr = mock.create_autospec(SR)
self.mock_sr.from_uuid.return_value = mock_sr

# Act
sr_health_check.main()

# Assert
mock_sr.check_sr.assert_called_with(SR_UUID)

0 comments on commit 8fea0eb

Please sign in to comment.