@@ -138,17 +138,30 @@ def test_linstor_missing(self, linstor_sr, host):
138138# --- Test diskless resources --------------------------------------------------
139139
140140def _get_diskful_hosts (host , controller_option , volume_name ):
141- # Find host where volume is diskless
142- # | {volume_name} | {host} | 7017 | Unused | Ok | UpToDate | 2023-10-24 18:52:05 |
143- lines = host .ssh ([
144- "linstor" , controller_option , "resource" , "list" ,
145- "|" , "grep" , volume_name , "|" , "grep" , "UpToDate"
146- ]).splitlines ()
147- diskfuls = []
148- for line in lines :
149- hostname = line .split ('|' )[2 ].strip ()
150- diskfuls += hostname
151- return diskfuls
141+ # TODO: If any resource is in a temporary creation state or unknown, then need to wait intelligently.
142+ attempt = 0
143+ retries = 3
144+ sleep_sec = 5
145+
146+ while attempt < retries :
147+ try :
148+ # Find host where volume is UpToDate
149+ # | {volume_name} | {host} | 7017 | Unused | Ok | UpToDate | 2023-10-24 18:52:05 |
150+ lines = host .ssh ([
151+ "linstor" , controller_option , "resource" , "list" ,
152+ "|" , "grep" , volume_name , "|" , "grep" , "UpToDate"
153+ ]).splitlines ()
154+ diskfuls = []
155+ for line in lines :
156+ hostname = line .split ('|' )[2 ].strip ()
157+ diskfuls += hostname
158+ return diskfuls
159+ except SSHCommandFailed as e :
160+ logging .error ("SSH Command Failed (attempt %d/%d): %s" , attempt + 1 , retries , e )
161+ attempt += 1
162+ if attempt >= retries :
163+ raise
164+ time .sleep (sleep_sec )
152165
153166def _ensure_resource_remain_diskless (host , controller_option , volume_name , diskless ):
154167 diskfuls = _get_diskful_hosts (host , controller_option , volume_name )
0 commit comments