mirror of
https://github.com/RichieCahill/dotfiles.git
synced 2026-04-17 04:58:19 -04:00
added system_tests
This commit is contained in:
1
python/system_tests/__init__.py
Normal file
1
python/system_tests/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""system_tests."""
|
||||
97
python/system_tests/components.py
Normal file
97
python/system_tests/components.py
Normal file
@@ -0,0 +1,97 @@
|
||||
"""Validate Jeeves."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from copy import copy
|
||||
from re import search
|
||||
from time import sleep
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from python.common import bash_wrapper
|
||||
from python.zfs import Zpool
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
|
||||
|
||||
def zpool_tests(pool_names: Sequence[str], zpool_capacity_threshold: int = 90) -> list[str] | None:
|
||||
"""Check the zpool health and capacity.
|
||||
|
||||
Args:
|
||||
pool_names (Sequence[str]): A list of pool names to test.
|
||||
zpool_capacity_threshold (int, optional): The threshold for the zpool capacity. Defaults to 90.
|
||||
|
||||
Returns:
|
||||
list[str] | None: A list of errors if any.
|
||||
"""
|
||||
logging.info("Testing zpool")
|
||||
|
||||
errors: list[str] = []
|
||||
for pool_name in pool_names:
|
||||
pool = Zpool(pool_name)
|
||||
if pool.health != "ONLINE":
|
||||
errors.append(f"{pool.name} is {pool.health}")
|
||||
if pool.capacity >= zpool_capacity_threshold:
|
||||
errors.append(f"{pool.name} is low on space")
|
||||
|
||||
upgrade_status, _ = bash_wrapper("zpool upgrade")
|
||||
if not search(r"Every feature flags pool has all supported and requested features enabled.", upgrade_status):
|
||||
errors.append("ZPool out of date run `sudo zpool upgrade -a`")
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def systemd_tests(
|
||||
service_names: Sequence[str],
|
||||
max_retries: int = 30,
|
||||
retry_delay_secs: int = 1,
|
||||
retryable_statuses: Sequence[str] | None = None,
|
||||
valid_statuses: Sequence[str] | None = None,
|
||||
) -> list[str] | None:
|
||||
"""Tests a systemd services.
|
||||
|
||||
Args:
|
||||
service_names (Sequence[str]): A list of service names to test.
|
||||
max_retries (int, optional): The maximum number of retries. Defaults to 30.
|
||||
minimum value is 1.
|
||||
retry_delay_secs (int, optional): The delay between retries in seconds. Defaults to 1.
|
||||
minimum value is 1.
|
||||
retryable_statuses (Sequence[str] | None, optional): A list of retryable statuses. Defaults to None.
|
||||
valid_statuses (Sequence[str] | None, optional): A list of valid statuses. Defaults to None.
|
||||
|
||||
Returns:
|
||||
list[str] | None: A list of errors if any.
|
||||
"""
|
||||
logging.info("Testing systemd service")
|
||||
|
||||
max_retries = max(max_retries, 1)
|
||||
retry_delay_secs = max(retry_delay_secs, 1)
|
||||
last_try = max_retries - 1
|
||||
|
||||
if retryable_statuses is None:
|
||||
retryable_statuses = ("inactive\n", "activating\n")
|
||||
|
||||
if valid_statuses is None:
|
||||
valid_statuses = ("active\n",)
|
||||
|
||||
service_names_set = set(service_names)
|
||||
|
||||
errors: set[str] = set()
|
||||
for retry in range(max_retries):
|
||||
if not service_names_set:
|
||||
break
|
||||
logging.info(f"Testing systemd service in {retry + 1} of {max_retries}")
|
||||
service_names_to_test = copy(service_names_set)
|
||||
for service_name in service_names_to_test:
|
||||
service_status, _ = bash_wrapper(f"systemctl is-active {service_name}")
|
||||
if service_status in valid_statuses:
|
||||
service_names_set.remove(service_name)
|
||||
continue
|
||||
if service_status in retryable_statuses and retry < last_try:
|
||||
continue
|
||||
errors.add(f"{service_name} is {service_status.strip()}")
|
||||
|
||||
sleep(retry_delay_secs)
|
||||
|
||||
return list(errors)
|
||||
64
python/system_tests/validate_system.py
Normal file
64
python/system_tests/validate_system.py
Normal file
@@ -0,0 +1,64 @@
|
||||
"""Validate {server_name}."""
|
||||
|
||||
import logging
|
||||
import sys
|
||||
import tomllib
|
||||
from os import environ
|
||||
from pathlib import Path
|
||||
from socket import gethostname
|
||||
|
||||
import typer
|
||||
|
||||
from python.common import configure_logger, signal_alert
|
||||
from python.system_tests.components import systemd_tests, zpool_tests
|
||||
|
||||
|
||||
def load_config_data(config_file: Path) -> dict[str, list[str]]:
|
||||
"""Load a TOML configuration file.
|
||||
|
||||
Args:
|
||||
config_file (Path): The path to the configuration file.
|
||||
|
||||
Returns:
|
||||
dict: The configuration data.
|
||||
"""
|
||||
return tomllib.loads(config_file.read_text())
|
||||
|
||||
|
||||
def main(config_file: Path) -> None:
|
||||
"""Main."""
|
||||
configure_logger(level=environ.get("LOG_LEVEL", "INFO"))
|
||||
|
||||
server_name = gethostname()
|
||||
logging.info(f"Starting {server_name} validation")
|
||||
|
||||
config_data = load_config_data(config_file)
|
||||
|
||||
errors: list[str] = []
|
||||
try:
|
||||
if config_data.get("zpools") and (zpool_errors := zpool_tests(config_data["zpools"])):
|
||||
errors.extend(zpool_errors)
|
||||
|
||||
if config_data.get("services") and (systemd_errors := systemd_tests(config_data["services"])):
|
||||
errors.extend(systemd_errors)
|
||||
|
||||
except Exception as error:
|
||||
logging.exception(f"{server_name} validation failed")
|
||||
errors.append(f"{server_name} validation failed: {error}")
|
||||
|
||||
if errors:
|
||||
logging.error(f"{server_name} validation failed: \n{'\n'.join(errors)}")
|
||||
signal_alert(f"{server_name} validation failed {errors}")
|
||||
|
||||
sys.exit(1)
|
||||
|
||||
logging.info(f"{server_name} validation passed")
|
||||
|
||||
|
||||
def cli() -> None:
|
||||
"""CLI."""
|
||||
typer.run(main)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli()
|
||||
Reference in New Issue
Block a user