dotfiles/agent/src/health.rs

//! System health information and checking

use std::{sync::Arc, time::Duration};

use serde::{Deserialize, Serialize};
use tokio::sync::watch;

use crate::messaging::{Client, Message};

const MEMORY_USAGE_CRITICAL_THRESHOLD: f64 = 90.0;
const CPU_USAGE_CRITICAL_THRESHOLD: f32 = 90.0;
const DISK_USAGE_CRITICAL_THRESHOLD: f32 = 90.0;

pub struct System {
    sys: sysinfo::System,
    disks: sysinfo::Disks,
}

impl System {
    pub fn new() -> Self {
        Self {
            sys: sysinfo::System::new(),
            disks: sysinfo::Disks::new(),
        }
    }

    pub fn refresh_resources(&mut self) {
        use sysinfo::{CpuRefreshKind, MemoryRefreshKind, RefreshKind};

        self.sys.refresh_specifics(
            RefreshKind::new()
                .with_memory(MemoryRefreshKind::everything())
                .with_cpu(CpuRefreshKind::everything()),
        );

        // self.disks.refresh_list();
    }

    pub fn system(&self) -> &sysinfo::System {
        &self.sys
    }

    pub fn disks(&self) -> &sysinfo::Disks {
        &self.disks
    }
}

#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
pub enum Status {
    #[default]
    Normal,
    Critical,
}

#[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct Health {
    cpu_status: Status,
    memory_status: Status,
    disk_status: Status,
}

#[derive(Clone)]
pub struct HealthMonitor(Arc<watch::Sender<Health>>);

impl HealthMonitor {
    pub fn new() -> Self {
        let (sender, _) = watch::channel(Health::default());

        Self(Arc::new(sender))
    }

    pub fn check_system(&self, system: &System) {
        let sys = system.system();

        let memory_usage = if sys.total_memory() > 0 {
            sys.used_memory() as f64 * 100.0 / sys.total_memory() as f64
        } else {
            0.0
        };

        let cpu_usage = sys.global_cpu_info().cpu_usage();

        // for d in system.disks().list() {
        //     let _avail = if d.total_space() > 0 {
        //         (d.available_space() * 100 / d.total_space()) as u8
        //     } else {
        //         0 as u8
        //     };
        // }

        self.0.send_if_modified(|health| {
            let cpu_changed = match health.cpu_status {
                Status::Normal if cpu_usage > CPU_USAGE_CRITICAL_THRESHOLD => {
                    health.cpu_status = Status::Critical;
                    true
                }
                Status::Critical if cpu_usage <= CPU_USAGE_CRITICAL_THRESHOLD => {
                    health.cpu_status = Status::Normal;
                    true
                }
                _ => false,
            };

            let memory_changed = match health.memory_status {
                Status::Normal if memory_usage > MEMORY_USAGE_CRITICAL_THRESHOLD => {
                    health.memory_status = Status::Critical;
                    true
                }
                Status::Critical if memory_usage <= MEMORY_USAGE_CRITICAL_THRESHOLD => {
                    health.memory_status = Status::Normal;
                    true
                }
                _ => false,
            };

            cpu_changed || memory_changed
        });
    }

    pub fn monitor(&self) -> watch::Receiver<Health> {
        self.0.subscribe()
    }
}

impl Default for HealthMonitor {
    #[inline]
    fn default() -> Self {
        Self::new()
    }
}

pub async fn init_health_subsystem(client: Client) -> HealthMonitor {
    let health_monitor = HealthMonitor::new();
    let health_monitor_clone = health_monitor.clone();
    let health_monitor_ret = health_monitor.clone();
    let mut system = System::new();

    // Forever refresh system resources and monitor changes
    std::thread::spawn(move || loop {
        const REFRESH_INTERVAL: Duration = Duration::from_secs(1);
        system.refresh_resources();
        health_monitor.check_system(&system);
        std::thread::sleep(REFRESH_INTERVAL);
    });

    tokio::spawn(async move {
        let mut recv = health_monitor_clone.monitor();

        while let Ok(()) = recv.changed().await {
            tracing::info!(health = ?&*recv.borrow(), "health watermark");
            let health = recv.borrow().clone();
            client.publish(Message::health(health).unwrap()).await;
        }
    });

    health_monitor_ret
}
health draft 2024-01-25 15:16:27 +00:00			`//! System health information and checking`

uggggh 2024-01-29 10:44:00 +00:00			`use std::{sync::Arc, time::Duration};`
health draft 2024-01-25 15:16:27 +00:00
uggggh 2024-01-29 10:44:00 +00:00			`use serde::{Deserialize, Serialize};`
health draft 2024-01-25 15:16:27 +00:00			`use tokio::sync::watch;`

uggggh 2024-01-29 10:44:00 +00:00			`use crate::messaging::{Client, Message};`

health draft 2024-01-25 15:16:27 +00:00			`const MEMORY_USAGE_CRITICAL_THRESHOLD: f64 = 90.0;`
			`const CPU_USAGE_CRITICAL_THRESHOLD: f32 = 90.0;`
			`const DISK_USAGE_CRITICAL_THRESHOLD: f32 = 90.0;`

			`pub struct System {`
			`sys: sysinfo::System,`
			`disks: sysinfo::Disks,`
			`}`

			`impl System {`
			`pub fn new() -> Self {`
			`Self {`
			`sys: sysinfo::System::new(),`
			`disks: sysinfo::Disks::new(),`
			`}`
			`}`

			`pub fn refresh_resources(&mut self) {`
messaging stuff 2024-01-26 16:22:09 +00:00			`use sysinfo::{CpuRefreshKind, MemoryRefreshKind, RefreshKind};`
health draft 2024-01-25 15:16:27 +00:00
			`self.sys.refresh_specifics(`
messaging stuff 2024-01-26 16:22:09 +00:00			`RefreshKind::new()`
todo: services 2024-01-27 21:01:59 +00:00			`.with_memory(MemoryRefreshKind::everything())`
messaging stuff 2024-01-26 16:22:09 +00:00			`.with_cpu(CpuRefreshKind::everything()),`
health draft 2024-01-25 15:16:27 +00:00			`);`

messaging stuff 2024-01-26 16:22:09 +00:00			`// self.disks.refresh_list();`
health draft 2024-01-25 15:16:27 +00:00			`}`

			`pub fn system(&self) -> &sysinfo::System {`
			`&self.sys`
			`}`

			`pub fn disks(&self) -> &sysinfo::Disks {`
			`&self.disks`
			`}`
			`}`

uggggh 2024-01-29 10:44:00 +00:00			`#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]`
health draft 2024-01-25 15:16:27 +00:00			`pub enum Status {`
			`#[default]`
			`Normal,`
messaging stuff 2024-01-26 16:22:09 +00:00			`Critical,`
health draft 2024-01-25 15:16:27 +00:00			`}`

uggggh 2024-01-29 10:44:00 +00:00			`#[derive(Clone, Debug, Default, Serialize, Deserialize)]`
health draft 2024-01-25 15:16:27 +00:00			`pub struct Health {`
messaging stuff 2024-01-26 16:22:09 +00:00			`cpu_status: Status,`
			`memory_status: Status,`
			`disk_status: Status,`
health draft 2024-01-25 15:16:27 +00:00			`}`

			`#[derive(Clone)]`
			`pub struct HealthMonitor(Arc<watch::Sender<Health>>);`

			`impl HealthMonitor {`
			`pub fn new() -> Self {`
messaging stuff 2024-01-26 16:22:09 +00:00			`let (sender, _) = watch::channel(Health::default());`
health draft 2024-01-25 15:16:27 +00:00
			`Self(Arc::new(sender))`
			`}`

			`pub fn check_system(&self, system: &System) {`
messaging stuff 2024-01-26 16:22:09 +00:00			`let sys = system.system();`
health draft 2024-01-25 15:16:27 +00:00
messaging stuff 2024-01-26 16:22:09 +00:00			`let memory_usage = if sys.total_memory() > 0 {`
			`sys.used_memory() as f64 * 100.0 / sys.total_memory() as f64`
health draft 2024-01-25 15:16:27 +00:00			`} else {`
			`0.0`
			`};`

messaging stuff 2024-01-26 16:22:09 +00:00			`let cpu_usage = sys.global_cpu_info().cpu_usage();`
health draft 2024-01-25 15:16:27 +00:00
			`// for d in system.disks().list() {`
messaging stuff 2024-01-26 16:22:09 +00:00			`// let _avail = if d.total_space() > 0 {`
health draft 2024-01-25 15:16:27 +00:00			`// (d.available_space() * 100 / d.total_space()) as u8`
			`// } else {`
			`// 0 as u8`
			`// };`
			`// }`

			`self.0.send_if_modified(\|health\| {`
messaging stuff 2024-01-26 16:22:09 +00:00			`let cpu_changed = match health.cpu_status {`
			`Status::Normal if cpu_usage > CPU_USAGE_CRITICAL_THRESHOLD => {`
			`health.cpu_status = Status::Critical;`
			`true`
			`}`
			`Status::Critical if cpu_usage <= CPU_USAGE_CRITICAL_THRESHOLD => {`
			`health.cpu_status = Status::Normal;`
			`true`
			`}`
			`_ => false,`
health draft 2024-01-25 15:16:27 +00:00			`};`

messaging stuff 2024-01-26 16:22:09 +00:00			`let memory_changed = match health.memory_status {`
			`Status::Normal if memory_usage > MEMORY_USAGE_CRITICAL_THRESHOLD => {`
			`health.memory_status = Status::Critical;`
			`true`
			`}`
			`Status::Critical if memory_usage <= MEMORY_USAGE_CRITICAL_THRESHOLD => {`
			`health.memory_status = Status::Normal;`
			`true`
			`}`
			`_ => false,`
health draft 2024-01-25 15:16:27 +00:00			`};`

messaging stuff 2024-01-26 16:22:09 +00:00			`cpu_changed \|\| memory_changed`
health draft 2024-01-25 15:16:27 +00:00			`});`
			`}`

			`pub fn monitor(&self) -> watch::Receiver<Health> {`
			`self.0.subscribe()`
			`}`
			`}`
messaging stuff 2024-01-26 16:22:09 +00:00
			`impl Default for HealthMonitor {`
			`#[inline]`
			`fn default() -> Self {`
			`Self::new()`
			`}`
			`}`
uggggh 2024-01-29 10:44:00 +00:00
			`pub async fn init_health_subsystem(client: Client) -> HealthMonitor {`
			`let health_monitor = HealthMonitor::new();`
			`let health_monitor_clone = health_monitor.clone();`
			`let health_monitor_ret = health_monitor.clone();`
			`let mut system = System::new();`

			`// Forever refresh system resources and monitor changes`
			`std::thread::spawn(move \|\| loop {`
			`const REFRESH_INTERVAL: Duration = Duration::from_secs(1);`
			`system.refresh_resources();`
			`health_monitor.check_system(&system);`
			`std::thread::sleep(REFRESH_INTERVAL);`
			`});`

			`tokio::spawn(async move {`
			`let mut recv = health_monitor_clone.monitor();`

			`while let Ok(()) = recv.changed().await {`
			`tracing::info!(health = ?&*recv.borrow(), "health watermark");`
			`let health = recv.borrow().clone();`
			`client.publish(Message::health(health).unwrap()).await;`
			`}`
			`});`

			`health_monitor_ret`
			`}`