2024-01-25 15:16:27 +00:00
|
|
|
//! System health information and checking
|
|
|
|
|
2024-01-29 10:44:00 +00:00
|
|
|
use std::{sync::Arc, time::Duration};
|
2024-01-25 15:16:27 +00:00
|
|
|
|
2024-01-29 10:44:00 +00:00
|
|
|
use serde::{Deserialize, Serialize};
|
2024-01-25 15:16:27 +00:00
|
|
|
use tokio::sync::watch;
|
|
|
|
|
2024-01-29 10:44:00 +00:00
|
|
|
use crate::messaging::{Client, Message};
|
|
|
|
|
2024-01-25 15:16:27 +00:00
|
|
|
const MEMORY_USAGE_CRITICAL_THRESHOLD: f64 = 90.0;
|
|
|
|
const CPU_USAGE_CRITICAL_THRESHOLD: f32 = 90.0;
|
|
|
|
const DISK_USAGE_CRITICAL_THRESHOLD: f32 = 90.0;
|
|
|
|
|
|
|
|
pub struct System {
|
|
|
|
sys: sysinfo::System,
|
|
|
|
disks: sysinfo::Disks,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl System {
|
|
|
|
pub fn new() -> Self {
|
|
|
|
Self {
|
|
|
|
sys: sysinfo::System::new(),
|
|
|
|
disks: sysinfo::Disks::new(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn refresh_resources(&mut self) {
|
2024-01-26 16:22:09 +00:00
|
|
|
use sysinfo::{CpuRefreshKind, MemoryRefreshKind, RefreshKind};
|
2024-01-25 15:16:27 +00:00
|
|
|
|
|
|
|
self.sys.refresh_specifics(
|
2024-01-26 16:22:09 +00:00
|
|
|
RefreshKind::new()
|
2024-01-27 21:01:59 +00:00
|
|
|
.with_memory(MemoryRefreshKind::everything())
|
2024-01-26 16:22:09 +00:00
|
|
|
.with_cpu(CpuRefreshKind::everything()),
|
2024-01-25 15:16:27 +00:00
|
|
|
);
|
|
|
|
|
2024-01-26 16:22:09 +00:00
|
|
|
// self.disks.refresh_list();
|
2024-01-25 15:16:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn system(&self) -> &sysinfo::System {
|
|
|
|
&self.sys
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn disks(&self) -> &sysinfo::Disks {
|
|
|
|
&self.disks
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-01-29 10:44:00 +00:00
|
|
|
#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
|
2024-01-25 15:16:27 +00:00
|
|
|
pub enum Status {
|
|
|
|
#[default]
|
|
|
|
Normal,
|
2024-01-26 16:22:09 +00:00
|
|
|
Critical,
|
2024-01-25 15:16:27 +00:00
|
|
|
}
|
|
|
|
|
2024-01-29 10:44:00 +00:00
|
|
|
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
|
2024-01-25 15:16:27 +00:00
|
|
|
pub struct Health {
|
2024-01-26 16:22:09 +00:00
|
|
|
cpu_status: Status,
|
|
|
|
memory_status: Status,
|
|
|
|
disk_status: Status,
|
2024-01-25 15:16:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone)]
|
|
|
|
pub struct HealthMonitor(Arc<watch::Sender<Health>>);
|
|
|
|
|
|
|
|
impl HealthMonitor {
|
|
|
|
pub fn new() -> Self {
|
2024-01-26 16:22:09 +00:00
|
|
|
let (sender, _) = watch::channel(Health::default());
|
2024-01-25 15:16:27 +00:00
|
|
|
|
|
|
|
Self(Arc::new(sender))
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn check_system(&self, system: &System) {
|
2024-01-26 16:22:09 +00:00
|
|
|
let sys = system.system();
|
2024-01-25 15:16:27 +00:00
|
|
|
|
2024-01-26 16:22:09 +00:00
|
|
|
let memory_usage = if sys.total_memory() > 0 {
|
|
|
|
sys.used_memory() as f64 * 100.0 / sys.total_memory() as f64
|
2024-01-25 15:16:27 +00:00
|
|
|
} else {
|
|
|
|
0.0
|
|
|
|
};
|
|
|
|
|
2024-01-26 16:22:09 +00:00
|
|
|
let cpu_usage = sys.global_cpu_info().cpu_usage();
|
2024-01-25 15:16:27 +00:00
|
|
|
|
|
|
|
// for d in system.disks().list() {
|
2024-01-26 16:22:09 +00:00
|
|
|
// let _avail = if d.total_space() > 0 {
|
2024-01-25 15:16:27 +00:00
|
|
|
// (d.available_space() * 100 / d.total_space()) as u8
|
|
|
|
// } else {
|
|
|
|
// 0 as u8
|
|
|
|
// };
|
|
|
|
// }
|
|
|
|
|
|
|
|
self.0.send_if_modified(|health| {
|
2024-01-26 16:22:09 +00:00
|
|
|
let cpu_changed = match health.cpu_status {
|
|
|
|
Status::Normal if cpu_usage > CPU_USAGE_CRITICAL_THRESHOLD => {
|
|
|
|
health.cpu_status = Status::Critical;
|
|
|
|
true
|
|
|
|
}
|
|
|
|
Status::Critical if cpu_usage <= CPU_USAGE_CRITICAL_THRESHOLD => {
|
|
|
|
health.cpu_status = Status::Normal;
|
|
|
|
true
|
|
|
|
}
|
|
|
|
_ => false,
|
2024-01-25 15:16:27 +00:00
|
|
|
};
|
|
|
|
|
2024-01-26 16:22:09 +00:00
|
|
|
let memory_changed = match health.memory_status {
|
|
|
|
Status::Normal if memory_usage > MEMORY_USAGE_CRITICAL_THRESHOLD => {
|
|
|
|
health.memory_status = Status::Critical;
|
|
|
|
true
|
|
|
|
}
|
|
|
|
Status::Critical if memory_usage <= MEMORY_USAGE_CRITICAL_THRESHOLD => {
|
|
|
|
health.memory_status = Status::Normal;
|
|
|
|
true
|
|
|
|
}
|
|
|
|
_ => false,
|
2024-01-25 15:16:27 +00:00
|
|
|
};
|
|
|
|
|
2024-01-26 16:22:09 +00:00
|
|
|
cpu_changed || memory_changed
|
2024-01-25 15:16:27 +00:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn monitor(&self) -> watch::Receiver<Health> {
|
|
|
|
self.0.subscribe()
|
|
|
|
}
|
|
|
|
}
|
2024-01-26 16:22:09 +00:00
|
|
|
|
|
|
|
impl Default for HealthMonitor {
|
|
|
|
#[inline]
|
|
|
|
fn default() -> Self {
|
|
|
|
Self::new()
|
|
|
|
}
|
|
|
|
}
|
2024-01-29 10:44:00 +00:00
|
|
|
|
|
|
|
pub async fn init_health_subsystem(client: Client) -> HealthMonitor {
|
|
|
|
let health_monitor = HealthMonitor::new();
|
|
|
|
let health_monitor_clone = health_monitor.clone();
|
|
|
|
let health_monitor_ret = health_monitor.clone();
|
|
|
|
let mut system = System::new();
|
|
|
|
|
|
|
|
// Forever refresh system resources and monitor changes
|
|
|
|
std::thread::spawn(move || loop {
|
|
|
|
const REFRESH_INTERVAL: Duration = Duration::from_secs(1);
|
|
|
|
system.refresh_resources();
|
|
|
|
health_monitor.check_system(&system);
|
|
|
|
std::thread::sleep(REFRESH_INTERVAL);
|
|
|
|
});
|
|
|
|
|
|
|
|
tokio::spawn(async move {
|
|
|
|
let mut recv = health_monitor_clone.monitor();
|
|
|
|
|
|
|
|
while let Ok(()) = recv.changed().await {
|
|
|
|
tracing::info!(health = ?&*recv.borrow(), "health watermark");
|
|
|
|
let health = recv.borrow().clone();
|
|
|
|
client.publish(Message::health(health).unwrap()).await;
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
health_monitor_ret
|
|
|
|
}
|