dotfiles/agent/src/health.rs
Nikos Papadakis 67d147c7e3
uggggh
2024-01-29 12:44:00 +02:00

157 lines
4.2 KiB
Rust

//! System health information and checking
use std::{sync::Arc, time::Duration};
use serde::{Deserialize, Serialize};
use tokio::sync::watch;
use crate::messaging::{Client, Message};
const MEMORY_USAGE_CRITICAL_THRESHOLD: f64 = 90.0;
const CPU_USAGE_CRITICAL_THRESHOLD: f32 = 90.0;
const DISK_USAGE_CRITICAL_THRESHOLD: f32 = 90.0;
pub struct System {
sys: sysinfo::System,
disks: sysinfo::Disks,
}
impl System {
pub fn new() -> Self {
Self {
sys: sysinfo::System::new(),
disks: sysinfo::Disks::new(),
}
}
pub fn refresh_resources(&mut self) {
use sysinfo::{CpuRefreshKind, MemoryRefreshKind, RefreshKind};
self.sys.refresh_specifics(
RefreshKind::new()
.with_memory(MemoryRefreshKind::everything())
.with_cpu(CpuRefreshKind::everything()),
);
// self.disks.refresh_list();
}
pub fn system(&self) -> &sysinfo::System {
&self.sys
}
pub fn disks(&self) -> &sysinfo::Disks {
&self.disks
}
}
#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
pub enum Status {
#[default]
Normal,
Critical,
}
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
pub struct Health {
cpu_status: Status,
memory_status: Status,
disk_status: Status,
}
#[derive(Clone)]
pub struct HealthMonitor(Arc<watch::Sender<Health>>);
impl HealthMonitor {
pub fn new() -> Self {
let (sender, _) = watch::channel(Health::default());
Self(Arc::new(sender))
}
pub fn check_system(&self, system: &System) {
let sys = system.system();
let memory_usage = if sys.total_memory() > 0 {
sys.used_memory() as f64 * 100.0 / sys.total_memory() as f64
} else {
0.0
};
let cpu_usage = sys.global_cpu_info().cpu_usage();
// for d in system.disks().list() {
// let _avail = if d.total_space() > 0 {
// (d.available_space() * 100 / d.total_space()) as u8
// } else {
// 0 as u8
// };
// }
self.0.send_if_modified(|health| {
let cpu_changed = match health.cpu_status {
Status::Normal if cpu_usage > CPU_USAGE_CRITICAL_THRESHOLD => {
health.cpu_status = Status::Critical;
true
}
Status::Critical if cpu_usage <= CPU_USAGE_CRITICAL_THRESHOLD => {
health.cpu_status = Status::Normal;
true
}
_ => false,
};
let memory_changed = match health.memory_status {
Status::Normal if memory_usage > MEMORY_USAGE_CRITICAL_THRESHOLD => {
health.memory_status = Status::Critical;
true
}
Status::Critical if memory_usage <= MEMORY_USAGE_CRITICAL_THRESHOLD => {
health.memory_status = Status::Normal;
true
}
_ => false,
};
cpu_changed || memory_changed
});
}
pub fn monitor(&self) -> watch::Receiver<Health> {
self.0.subscribe()
}
}
impl Default for HealthMonitor {
#[inline]
fn default() -> Self {
Self::new()
}
}
pub async fn init_health_subsystem(client: Client) -> HealthMonitor {
let health_monitor = HealthMonitor::new();
let health_monitor_clone = health_monitor.clone();
let health_monitor_ret = health_monitor.clone();
let mut system = System::new();
// Forever refresh system resources and monitor changes
std::thread::spawn(move || loop {
const REFRESH_INTERVAL: Duration = Duration::from_secs(1);
system.refresh_resources();
health_monitor.check_system(&system);
std::thread::sleep(REFRESH_INTERVAL);
});
tokio::spawn(async move {
let mut recv = health_monitor_clone.monitor();
while let Ok(()) = recv.changed().await {
tracing::info!(health = ?&*recv.borrow(), "health watermark");
let health = recv.borrow().clone();
client.publish(Message::health(health).unwrap()).await;
}
});
health_monitor_ret
}