|
1 | 1 | import pytest |
2 | 2 |
|
| 3 | +import json |
3 | 4 | import logging |
4 | 5 | import time |
5 | 6 |
|
@@ -52,6 +53,29 @@ def test_create_and_destroy_sr(self, pool_with_linstor, provisioning_type, stora |
52 | 53 | vm.destroy(verify=True) |
53 | 54 | sr.destroy(verify=True) |
54 | 55 |
|
| 56 | + |
| 57 | +def get_drbd_status(host, resource): |
| 58 | + logging.debug(f"[{host}] Fetching DRBD status for resource `{resource}`...") |
| 59 | + return json.loads(host.ssh(["drbdsetup", "status", resource, "--json"])) |
| 60 | + |
| 61 | +def get_corrupted_resources(host, resource): |
| 62 | + return [ |
| 63 | + ( |
| 64 | + res.get("name", ""), |
| 65 | + conn.get("name", ""), |
| 66 | + peer.get("out-of-sync", 0), |
| 67 | + ) |
| 68 | + for res in get_drbd_status(host, resource) |
| 69 | + for conn in res.get("connections", []) |
| 70 | + for peer in conn.get("peer_devices", []) |
| 71 | + if peer.get("out-of-sync", 0) > 0 |
| 72 | + ] |
| 73 | + |
| 74 | +def wait_sync(host, resource): |
| 75 | + logging.info(f"[{host}] Waiting for DRBD sync on resource `{resource}`...") |
| 76 | + host.ssh(["drbdadm", "wait-sync", resource]) |
| 77 | + |
| 78 | + |
55 | 79 | @pytest.mark.usefixtures("linstor_sr") |
56 | 80 | class TestLinstorSR: |
57 | 81 | @pytest.mark.quicktest |
@@ -88,6 +112,61 @@ def test_snapshot(self, vm_on_linstor_sr): |
88 | 112 | finally: |
89 | 113 | vm.shutdown(verify=True) |
90 | 114 |
|
| 115 | + @pytest.mark.small_vm |
| 116 | + def test_resynchronization(self, host_and_corrupted_vdi_on_linstor_sr): |
| 117 | + (vm, host, resource_name) = host_and_corrupted_vdi_on_linstor_sr |
| 118 | + hostname = host.hostname() |
| 119 | + |
| 120 | + try: |
| 121 | + other_host = next( |
| 122 | + next(h for h in host.pool.hosts if h.hostname() == conn.get("name", "")) |
| 123 | + for res in get_drbd_status(host, resource_name) |
| 124 | + for conn in res.get("connections", []) |
| 125 | + for peer in conn.get("peer_devices", []) |
| 126 | + if peer.get("peer-disk-state", "") == "UpToDate" |
| 127 | + ) |
| 128 | + logging.info(f"Elected `{other_host}` as peer for verification and repair") |
| 129 | + except StopIteration: |
| 130 | + pytest.fail("Could not find an UpToDate peer host") |
| 131 | + |
| 132 | + corrupted = None |
| 133 | + max_attempts = 3 |
| 134 | + # Attempting several times since testing revealed `drbdadm verify` can be flaky |
| 135 | + for attempt in range(1, max_attempts + 1): |
| 136 | + logging.info(f"`drbdadm verify` attempt {attempt}/{max_attempts}") |
| 137 | + logging.info(f"[{other_host}] Running DRBD verify for `{resource_name}`...") |
| 138 | + other_host.ssh(["drbdadm", "verify", f"{resource_name}:{hostname}/0"]) |
| 139 | + wait_sync(other_host, resource_name) |
| 140 | + |
| 141 | + corrupted_resources = get_corrupted_resources(other_host, resource_name) |
| 142 | + if not corrupted_resources: |
| 143 | + logging.warning(f"No corrupted resources found on attempt #{attempt}") |
| 144 | + continue |
| 145 | + for res_name, peer_name, out_of_sync in corrupted_resources: |
| 146 | + if res_name == resource_name and peer_name == hostname: |
| 147 | + corrupted = (res_name, peer_name, out_of_sync) |
| 148 | + if corrupted: |
| 149 | + break |
| 150 | + if not corrupted: |
| 151 | + pytest.fail(f"Failed to identify corrupted resource after {max_attempts} attempts") |
| 152 | + |
| 153 | + logging.info(f"Invalidating remote resource `{resource_name}`...") |
| 154 | + other_host.ssh([ |
| 155 | + "drbdadm", "invalidate-remote", |
| 156 | + f"{resource_name}:{hostname}/0", |
| 157 | + "--reset-bitmap=no" |
| 158 | + ]) |
| 159 | + wait_sync(other_host, resource_name) |
| 160 | + if get_corrupted_resources(other_host, resource_name): |
| 161 | + pytest.fail("Corrupted resource did not get fixed") |
| 162 | + |
| 163 | + vm.start(on=host.uuid) |
| 164 | + try: |
| 165 | + vm.wait_for_os_booted() |
| 166 | + vm.test_snapshot_on_running_vm() |
| 167 | + finally: |
| 168 | + vm.shutdown(verify=True) |
| 169 | + |
91 | 170 | # *** tests with reboots (longer tests). |
92 | 171 |
|
93 | 172 | @pytest.mark.reboot |
|
0 commit comments