From e12a50f938585a92bf5482fb5b1cc6aeb8d93186 Mon Sep 17 00:00:00 2001 From: Dorian Date: Wed, 11 Mar 2026 00:22:57 +0000 Subject: [PATCH] feat: add system monitoring RPC endpoints (system.stats, system.processes, system.temperature) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Read real metrics from /proc/stat, /proc/meminfo, /proc/uptime, /proc/loadavg, df, ps, and /sys/class/thermal/. CPU usage computed via dual-sample jiffies. Deployed and verified on live server — all three endpoints return real data. Co-Authored-By: Claude Opus 4.6 --- core/archipelago/src/api/rpc/mod.rs | 6 + core/archipelago/src/api/rpc/system.rs | 268 +++++++++++++++++++++++++ loop/plan.md | 2 +- 3 files changed, 275 insertions(+), 1 deletion(-) create mode 100644 core/archipelago/src/api/rpc/system.rs diff --git a/core/archipelago/src/api/rpc/mod.rs b/core/archipelago/src/api/rpc/mod.rs index ac9edc41..3abe79d3 100644 --- a/core/archipelago/src/api/rpc/mod.rs +++ b/core/archipelago/src/api/rpc/mod.rs @@ -15,6 +15,7 @@ mod peers; mod router; mod tor; mod totp; +mod system; mod update; mod wallet; @@ -319,6 +320,11 @@ impl RpcHandler { "dwn.status" => self.handle_dwn_status().await, "dwn.sync" => self.handle_dwn_sync().await, + // System monitoring + "system.stats" => self.handle_system_stats().await, + "system.processes" => self.handle_system_processes().await, + "system.temperature" => self.handle_system_temperature().await, + // System updates "update.check" => self.handle_update_check().await, "update.status" => self.handle_update_status().await, diff --git a/core/archipelago/src/api/rpc/system.rs b/core/archipelago/src/api/rpc/system.rs new file mode 100644 index 00000000..efbfd1c5 --- /dev/null +++ b/core/archipelago/src/api/rpc/system.rs @@ -0,0 +1,268 @@ +use super::RpcHandler; +use anyhow::{Context, Result}; +use tracing::debug; + +impl RpcHandler { + /// system.stats — CPU usage, RAM used/total, disk used/total, uptime, load average + pub(super) async fn handle_system_stats(&self) -> Result { + debug!("Getting system stats"); + + let uptime = read_uptime().await.unwrap_or(0.0); + let load = read_loadavg().await.unwrap_or((0.0, 0.0, 0.0)); + let cpu = read_cpu_usage().await.unwrap_or(0.0); + let (mem_used, mem_total) = read_meminfo().await.unwrap_or((0, 0)); + let (disk_used, disk_total) = read_disk_usage().await.unwrap_or((0, 0)); + + Ok(serde_json::json!({ + "uptime_secs": uptime as u64, + "load_avg_1": load.0, + "load_avg_5": load.1, + "load_avg_15": load.2, + "cpu_usage_percent": cpu, + "mem_used_bytes": mem_used, + "mem_total_bytes": mem_total, + "disk_used_bytes": disk_used, + "disk_total_bytes": disk_total, + })) + } + + /// system.processes — top 10 processes by CPU + pub(super) async fn handle_system_processes(&self) -> Result { + debug!("Getting top processes"); + + let procs = read_top_processes().await.unwrap_or_default(); + + Ok(serde_json::json!({ "processes": procs })) + } + + /// system.temperature — thermal zone readings + pub(super) async fn handle_system_temperature(&self) -> Result { + debug!("Getting system temperature"); + + let temps = read_temperatures().await.unwrap_or_default(); + + Ok(serde_json::json!({ "temperatures": temps })) + } +} + +/// Read system uptime from /proc/uptime (seconds since boot). +async fn read_uptime() -> Result { + let content = tokio::fs::read_to_string("/proc/uptime") + .await + .context("Failed to read /proc/uptime")?; + let uptime: f64 = content + .split_whitespace() + .next() + .ok_or_else(|| anyhow::anyhow!("Empty /proc/uptime"))? + .parse() + .context("Failed to parse uptime")?; + Ok(uptime) +} + +/// Read load averages from /proc/loadavg. +async fn read_loadavg() -> Result<(f64, f64, f64)> { + let content = tokio::fs::read_to_string("/proc/loadavg") + .await + .context("Failed to read /proc/loadavg")?; + let mut parts = content.split_whitespace(); + let l1: f64 = parts + .next() + .ok_or_else(|| anyhow::anyhow!("Missing load1"))? + .parse() + .context("parse load1")?; + let l5: f64 = parts + .next() + .ok_or_else(|| anyhow::anyhow!("Missing load5"))? + .parse() + .context("parse load5")?; + let l15: f64 = parts + .next() + .ok_or_else(|| anyhow::anyhow!("Missing load15"))? + .parse() + .context("parse load15")?; + Ok((l1, l5, l15)) +} + +/// Compute CPU usage by sampling /proc/stat twice with a 250ms gap. +async fn read_cpu_usage() -> Result { + let snap1 = read_cpu_jiffies().await?; + tokio::time::sleep(std::time::Duration::from_millis(250)).await; + let snap2 = read_cpu_jiffies().await?; + + let total_delta = snap2.total.saturating_sub(snap1.total); + let idle_delta = snap2.idle.saturating_sub(snap1.idle); + + if total_delta == 0 { + return Ok(0.0); + } + + let usage = 100.0 * (1.0 - (idle_delta as f64 / total_delta as f64)); + Ok((usage * 10.0).round() / 10.0) // one decimal +} + +struct CpuJiffies { + total: u64, + idle: u64, +} + +async fn read_cpu_jiffies() -> Result { + let content = tokio::fs::read_to_string("/proc/stat") + .await + .context("Failed to read /proc/stat")?; + let cpu_line = content + .lines() + .next() + .ok_or_else(|| anyhow::anyhow!("Empty /proc/stat"))?; + // cpu user nice system idle iowait irq softirq steal guest guest_nice + let vals: Vec = cpu_line + .split_whitespace() + .skip(1) // skip "cpu" + .filter_map(|v| v.parse().ok()) + .collect(); + if vals.len() < 4 { + anyhow::bail!("Not enough fields in /proc/stat cpu line"); + } + let idle = vals[3]; // idle column + let total: u64 = vals.iter().sum(); + Ok(CpuJiffies { total, idle }) +} + +/// Read memory info from /proc/meminfo. +/// Returns (used_bytes, total_bytes). +async fn read_meminfo() -> Result<(u64, u64)> { + let content = tokio::fs::read_to_string("/proc/meminfo") + .await + .context("Failed to read /proc/meminfo")?; + + let mut total_kb: u64 = 0; + let mut available_kb: u64 = 0; + + for line in content.lines() { + if let Some(val) = line.strip_prefix("MemTotal:") { + total_kb = parse_meminfo_kb(val)?; + } else if let Some(val) = line.strip_prefix("MemAvailable:") { + available_kb = parse_meminfo_kb(val)?; + } + } + + let used_bytes = total_kb.saturating_sub(available_kb) * 1024; + let total_bytes = total_kb * 1024; + Ok((used_bytes, total_bytes)) +} + +fn parse_meminfo_kb(val: &str) -> Result { + val.trim() + .trim_end_matches("kB") + .trim() + .parse::() + .context("parse meminfo value") +} + +/// Read disk usage via `df` for the root filesystem. +/// Returns (used_bytes, total_bytes). +async fn read_disk_usage() -> Result<(u64, u64)> { + let output = tokio::process::Command::new("df") + .args(["--block-size=1", "--output=used,size", "/"]) + .output() + .await + .context("Failed to run df")?; + + if !output.status.success() { + anyhow::bail!("df failed: {}", String::from_utf8_lossy(&output.stderr)); + } + + let stdout = String::from_utf8(output.stdout).context("df output not utf8")?; + // Skip header line + let data_line = stdout + .lines() + .nth(1) + .ok_or_else(|| anyhow::anyhow!("No data line from df"))?; + let mut parts = data_line.split_whitespace(); + let used: u64 = parts + .next() + .ok_or_else(|| anyhow::anyhow!("Missing used"))? + .parse() + .context("parse df used")?; + let total: u64 = parts + .next() + .ok_or_else(|| anyhow::anyhow!("Missing total"))? + .parse() + .context("parse df total")?; + + Ok((used, total)) +} + +/// Read top 10 processes by CPU from `ps`. +async fn read_top_processes() -> Result> { + let output = tokio::process::Command::new("ps") + .args(["--no-headers", "-eo", "pid,%cpu,%mem,comm", "--sort=-%cpu"]) + .output() + .await + .context("Failed to run ps")?; + + if !output.status.success() { + anyhow::bail!("ps failed: {}", String::from_utf8_lossy(&output.stderr)); + } + + let stdout = String::from_utf8(output.stdout).context("ps output not utf8")?; + let procs: Vec = stdout + .lines() + .take(10) + .filter_map(|line| { + let mut parts = line.split_whitespace(); + let pid = parts.next()?.parse::().ok()?; + let cpu: f64 = parts.next()?.parse().ok()?; + let mem: f64 = parts.next()?.parse().ok()?; + let name = parts.collect::>().join(" "); + Some(serde_json::json!({ + "pid": pid, + "cpu_percent": cpu, + "mem_percent": mem, + "name": name, + })) + }) + .collect(); + + Ok(procs) +} + +/// Read temperatures from /sys/class/thermal/thermal_zone*/temp. +async fn read_temperatures() -> Result> { + let mut temps = Vec::new(); + let thermal_dir = std::path::Path::new("/sys/class/thermal"); + if !thermal_dir.exists() { + return Ok(temps); + } + + let mut entries = tokio::fs::read_dir(thermal_dir) + .await + .context("Failed to read /sys/class/thermal")?; + + while let Some(entry) = entries.next_entry().await? { + let name = entry.file_name(); + let name_str = name.to_string_lossy(); + if !name_str.starts_with("thermal_zone") { + continue; + } + + let temp_path = entry.path().join("temp"); + let type_path = entry.path().join("type"); + + let millideg = match tokio::fs::read_to_string(&temp_path).await { + Ok(s) => s.trim().parse::().unwrap_or(0), + Err(_) => continue, + }; + + let zone_type = tokio::fs::read_to_string(&type_path) + .await + .map(|s| s.trim().to_string()) + .unwrap_or_else(|_| name_str.to_string()); + + temps.push(serde_json::json!({ + "zone": zone_type, + "temp_celsius": millideg as f64 / 1000.0, + })); + } + + Ok(temps) +} diff --git a/loop/plan.md b/loop/plan.md index 99ea725b..a4a60496 100644 --- a/loop/plan.md +++ b/loop/plan.md @@ -48,7 +48,7 @@ #### Sprint 3: Backend Robustness (Week 5-6) -- [ ] **BACK-01** — Add system monitoring RPC endpoints. Create `core/archipelago/src/api/rpc/system.rs` with handlers for: `system.stats` (CPU usage, RAM used/total, disk used/total, uptime, load average), `system.processes` (top 10 by CPU), `system.temperature` (if available). Read from `/proc/stat`, `/proc/meminfo`, `/proc/uptime`, `df`, and `/sys/class/thermal/` on Linux. Register in `core/archipelago/src/api/rpc/mod.rs` route table. **Acceptance**: `curl -X POST http://localhost:5678/rpc/v1 -d '{"method":"system.stats"}'` returns real metrics on dev server. +- [x] **BACK-01** — Add system monitoring RPC endpoints. Create `core/archipelago/src/api/rpc/system.rs` with handlers for: `system.stats` (CPU usage, RAM used/total, disk used/total, uptime, load average), `system.processes` (top 10 by CPU), `system.temperature` (if available). Read from `/proc/stat`, `/proc/meminfo`, `/proc/uptime`, `df`, and `/sys/class/thermal/` on Linux. Register in `core/archipelago/src/api/rpc/mod.rs` route table. **Acceptance**: `curl -X POST http://localhost:5678/rpc/v1 -d '{"method":"system.stats"}'` returns real metrics on dev server. - [ ] **BACK-02** — Add system monitoring to frontend Dashboard. In `neode-ui/src/views/Home.vue`, add a system stats section (CPU, RAM, Disk gauges) that calls `system.stats` RPC on mount and refreshes every 30s. Use `bg-white/5 rounded-lg` sub-cards inside an existing glass container. Show percentage bars with color coding (green <70%, orange 70-90%, red >90%). **Acceptance**: Dashboard shows real CPU/RAM/Disk usage. Deploy and verify.