Skip to content

Commit

Permalink
Add sleep after disk deletion (#691)
Browse files Browse the repository at this point in the history
  • Loading branch information
ebattat authored Nov 6, 2023
1 parent e3acceb commit 56f48b6
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 30 deletions.
Original file line number Diff line number Diff line change
@@ -1,33 +1,25 @@

class OCError(Exception):
""" Base class for all OC error classes.
All exceptions raised by the benchmark runner library should inherit from this class. """
class OCPResourceError(Exception):
""" Base class for all OCP resource error classes. """
pass


class OCPResourceNotCreateTimeout(OCError):
"""This exception return resource create timeout error"""
class OCPResourceCreationTimeout(OCPResourceError):
"""This exception returns resource creation timeout error"""
def __init__(self, resource):
self.message = f'The {resource} resource was not created'
super(OCPResourceNotCreateTimeout, self).__init__(self.message)
super(OCPResourceCreationTimeout, self).__init__(self.message)


class KataInstallationFailed(OCError):
class ODFInstallationFailed(OCPResourceError):
"""This exception returns failure to install ODF"""
def __init__(self, disk_num):
self.message = f'Incorrect ODF Ceph disk number: {disk_num}'
super(ODFInstallationFailed, self).__init__(self.message)


class KataInstallationFailed(OCPResourceError):
"""This exception returns failure to install sandboxed containers"""
def __init__(self, reason):
self.message = f'Installation of sandboxed containers failed: {reason}'
super(KataInstallationFailed, self).__init__(self.message)


class ExecFailed(OCError):
"""exec command on pod failed"""
def __init__(self, pod, command, reason):
self.message = f'exec {command} on {pod} failed: {reason}'
super(ExecFailed, self).__init__(self.message)


class PodFailed(OCError):
"""exec command on pod failed"""
def __init__(self, pod):
self.message = f'pod {pod} failed'
super(PodFailed, self).__init__(self.message)
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
from typeguard import typechecked

from benchmark_runner.common.oc.oc import OC
from benchmark_runner.common.logger.logger_time_stamp import logger_time_stamp, logger
from benchmark_runner.common.logger.logger_time_stamp import logger_time_stamp
from benchmark_runner.main.environment_variables import environment_variables
from benchmark_runner.common.ocp_resources.create_ocp_resource_exceptions import OCPResourceNotCreateTimeout
from benchmark_runner.common.ocp_resources.create_ocp_resource_exceptions import OCPResourceCreationTimeout, ODFInstallationFailed


class CreateOCPResourceOperations:
"""
This class is created OCP resources
This class creates OCP resources
"""
def __init__(self, oc: OC):
self._environment_variables_dict = environment_variables.environment_variables_dict
Expand Down Expand Up @@ -39,7 +39,7 @@ def _replace_in_file(file_path: str, old_value: str, new_value: str):

@typechecked
@logger_time_stamp
def wait_for_ocp_resource_create(self, resource: str, verify_cmd: str, status: str = '', count_disk_maker: bool = False, count_openshift_storage: bool = False, kata_worker_machine_count: bool = False, timeout: int = int(environment_variables.environment_variables_dict['timeout'])):
def wait_for_ocp_resource_create(self, resource: str, verify_cmd: str, status: str = '', count_disk_maker: bool = False, count_openshift_storage: bool = False, kata_worker_machine_count: bool = False, verify_installation: bool = False, timeout: int = int(environment_variables.environment_variables_dict['timeout'])):
"""
This method waits till operator is created or throw exception after timeout
:param resource: The resource cnv, local storage, odf, kata
Expand All @@ -48,6 +48,8 @@ def wait_for_ocp_resource_create(self, resource: str, verify_cmd: str, status: s
:param count_disk_maker: count disk maker
:param count_openshift_storage: count openshift storage disks
:param kata_worker_machine_count: count kata worker machine
:param verify_installation: Verify that the installation was successful
:param timeout: Timeout duration for OpenShift resource creation.
:return: True if met the result
"""
current_wait_time = 0
Expand All @@ -56,6 +58,10 @@ def wait_for_ocp_resource_create(self, resource: str, verify_cmd: str, status: s
if count_openshift_storage:
if int(self.__oc.run(verify_cmd)) == self.__oc.get_num_active_nodes() * int(environment_variables.environment_variables_dict['num_odf_disk']):
return True
else:
# Verify ODF installation that all Ceph disks are operational. If not, raise an exception.
if verify_installation:
raise ODFInstallationFailed(disk_num=self.__oc.run(verify_cmd))
# Count disk maker (worker/master number * disk maker)
elif count_disk_maker:
if int(self.__oc.run(verify_cmd)) == int(self.__oc.get_num_active_nodes()) * 2:
Expand All @@ -75,7 +81,7 @@ def wait_for_ocp_resource_create(self, resource: str, verify_cmd: str, status: s
# sleep for x seconds
time.sleep(OC.SLEEP_TIME)
current_wait_time += OC.SLEEP_TIME
raise OCPResourceNotCreateTimeout(resource)
raise OCPResourceCreationTimeout(resource)

def apply_non_approved_patch(self, approved_values_list: list, namespace: str, resource: str):
"""
Expand Down
11 changes: 7 additions & 4 deletions benchmark_runner/common/ocp_resources/create_odf.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def create_odf(self):
for resource in self.__resource_list:
logger.info(f'run {resource}')
if resource.endswith('.sh'):
# disk cleanup - reference: https://rook.io/docs/rook/v1.12/Getting-Started/ceph-teardown/#delete-the-data-on-hosts
# Ceph disk deletion - reference: https://rook.io/docs/rook/v1.12/Getting-Started/ceph-teardown/#delete-the-data-on-hosts
if '01_delete_disks.sh' == resource:
delete_node_disk = ''
result_dict = {}
Expand All @@ -39,6 +39,9 @@ def create_odf(self):
result_dict[node] = delete_node_disk
delete_node_disk = ''
self.__oc.run(cmd=f'chmod +x {os.path.join(self.__path, resource)}; {self.__path}/./{resource} "{list(result_dict.keys())[0]}" "{list(result_dict.values())[0]}" "{list(result_dict.keys())[1]}" "{list(result_dict.values())[1]}" "{list(result_dict.keys())[2]}" "{list(result_dict.values())[2]}"')
# add sleep after Ceph disk deletion for avoiding installation failure
logger.info(f"sleep {self._environment_variables_dict.get('bulk_sleep_time', '')} seconds")
time.sleep(int(self._environment_variables_dict.get('bulk_sleep_time', '')))
else:
self.__oc.run(cmd=f'chmod +x {os.path.join(self.__path, resource)}; {self.__path}/./{resource}')
else: # yaml
Expand Down Expand Up @@ -71,7 +74,7 @@ def create_odf(self):
self.wait_for_ocp_resource_create(resource='odf',
verify_cmd='oc get pod -n openshift-storage | grep osd | grep -v prepare | wc -l',
count_openshift_storage=True)
# sleep between each resource run for avoiding installation failure
logger.info(f"sleep {self._environment_variables_dict.get('bulk_sleep_time', '')} seconds")
time.sleep(int(self._environment_variables_dict.get('bulk_sleep_time', '')))
self.wait_for_ocp_resource_create(resource='odf',
verify_cmd='oc get pod -n openshift-storage | grep osd | grep -v prepare | wc -l',
count_openshift_storage=True, verify_installation=True)
return True

0 comments on commit 56f48b6

Please sign in to comment.