fix: add webhook delivery for monitoring alerts
DiskUsage and ContainerCrash alerts now fire webhooks via send_webhook() after pushing WebSocket notifications. Added data_dir parameter to spawn_metrics_collector for webhook config access. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
a227ca8c32
commit
701b202b41
@ -1,5 +1,6 @@
|
|||||||
pub mod collector;
|
pub mod collector;
|
||||||
|
|
||||||
|
use crate::webhooks::{self, WebhookEvent, WebhookPayload};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::collections::VecDeque;
|
use std::collections::VecDeque;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
@ -481,6 +482,7 @@ async fn save_alert_rules(data_dir: &std::path::Path, rules: &[AlertRule]) -> an
|
|||||||
pub fn spawn_metrics_collector(
|
pub fn spawn_metrics_collector(
|
||||||
store: Arc<MetricsStore>,
|
store: Arc<MetricsStore>,
|
||||||
state: Option<Arc<crate::state::StateManager>>,
|
state: Option<Arc<crate::state::StateManager>>,
|
||||||
|
data_dir: Option<PathBuf>,
|
||||||
) {
|
) {
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
// Wait 30s for system to stabilize after boot
|
// Wait 30s for system to stabilize after boot
|
||||||
@ -534,6 +536,31 @@ pub fn spawn_metrics_collector(
|
|||||||
state_mgr.update_data(data).await;
|
state_mgr.update_data(data).await;
|
||||||
info!("Fired {} alert(s)", alerts.len());
|
info!("Fired {} alert(s)", alerts.len());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fire-and-forget webhook delivery for mapped alert types
|
||||||
|
if let Some(ref dir) = data_dir {
|
||||||
|
for alert in &alerts {
|
||||||
|
let event = match alert.kind {
|
||||||
|
AlertRuleKind::DiskUsage => Some(WebhookEvent::DiskWarning),
|
||||||
|
AlertRuleKind::ContainerCrash => Some(WebhookEvent::ContainerCrash),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
if let Some(event) = event {
|
||||||
|
let payload = WebhookPayload {
|
||||||
|
event,
|
||||||
|
title: format!("{:?} Alert", alert.kind),
|
||||||
|
message: alert.message.clone(),
|
||||||
|
timestamp: chrono::Utc::now().to_rfc3339(),
|
||||||
|
node_id: String::new(),
|
||||||
|
details: Some(serde_json::json!({
|
||||||
|
"value": alert.value,
|
||||||
|
"threshold": alert.threshold,
|
||||||
|
})),
|
||||||
|
};
|
||||||
|
webhooks::send_webhook(dir, payload).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
|
|||||||
@ -77,7 +77,7 @@ impl Server {
|
|||||||
|
|
||||||
// Create metrics store and spawn background collector
|
// Create metrics store and spawn background collector
|
||||||
let metrics_store = Arc::new(MetricsStore::with_data_dir(config.data_dir.clone()));
|
let metrics_store = Arc::new(MetricsStore::with_data_dir(config.data_dir.clone()));
|
||||||
crate::monitoring::spawn_metrics_collector(metrics_store.clone(), Some(state_manager.clone()));
|
crate::monitoring::spawn_metrics_collector(metrics_store.clone(), Some(state_manager.clone()), Some(config.data_dir.clone()));
|
||||||
|
|
||||||
let api_handler = Arc::new(
|
let api_handler = Arc::new(
|
||||||
ApiHandler::new(config.clone(), state_manager.clone(), metrics_store).await?,
|
ApiHandler::new(config.clone(), state_manager.clone(), metrics_store).await?,
|
||||||
|
|||||||
@ -470,7 +470,7 @@
|
|||||||
|
|
||||||
- [x] **WHFIX-01** — Decouple health monitor from webhook config. In `core/archipelago/src/health_monitor.rs` lines 150-156, the health check loop skips ALL monitoring (restarts + WebSocket notifications) when webhooks are disabled or ContainerCrash isn't subscribed. This means fresh installs (webhooks disabled by default) get NO auto-restart and NO UI notifications. Fix: remove the webhook config gate from the main loop. Health checks, auto-restarts, and WebSocket `Notification` pushes must run unconditionally. Move the webhook gate into a separate block that only controls external HTTP webhook delivery — call `webhooks::send_webhook()` only when enabled AND the event is subscribed. Keep the existing `send_webhook()` function which already checks `config.enabled` and `config.events.contains()` internally. **Acceptance**: With webhooks disabled (default), crash a container (`sudo podman stop archy-filebrowser`), confirm health monitor detects it within 60s, auto-restarts it, and pushes a Notification visible in the Dashboard toast. With webhooks enabled + URL configured, confirm HTTP POST is also sent. Deploy and verify on 192.168.1.228.
|
- [x] **WHFIX-01** — Decouple health monitor from webhook config. In `core/archipelago/src/health_monitor.rs` lines 150-156, the health check loop skips ALL monitoring (restarts + WebSocket notifications) when webhooks are disabled or ContainerCrash isn't subscribed. This means fresh installs (webhooks disabled by default) get NO auto-restart and NO UI notifications. Fix: remove the webhook config gate from the main loop. Health checks, auto-restarts, and WebSocket `Notification` pushes must run unconditionally. Move the webhook gate into a separate block that only controls external HTTP webhook delivery — call `webhooks::send_webhook()` only when enabled AND the event is subscribed. Keep the existing `send_webhook()` function which already checks `config.enabled` and `config.events.contains()` internally. **Acceptance**: With webhooks disabled (default), crash a container (`sudo podman stop archy-filebrowser`), confirm health monitor detects it within 60s, auto-restarts it, and pushes a Notification visible in the Dashboard toast. With webhooks enabled + URL configured, confirm HTTP POST is also sent. Deploy and verify on 192.168.1.228.
|
||||||
|
|
||||||
- [ ] **WHFIX-02** — Add monitoring.rs webhook integration. In `core/archipelago/src/monitoring/mod.rs`, the alert system pushes `Notification` to DataModel but never calls `webhooks::send_webhook()`. Add webhook delivery for fired alerts: when a `DiskWarning` alert fires, send `WebhookEvent::DiskWarning`; when `ContainerCrash` fires, send `WebhookEvent::ContainerCrash`. Map alert types to webhook events. The webhook call should be fire-and-forget (already is in `send_webhook`). **Acceptance**: Configure a webhook URL, trigger a disk warning (lower threshold temporarily to 1%), confirm HTTP POST received. Deploy and verify.
|
- [x] **WHFIX-02** — Add monitoring.rs webhook integration. In `core/archipelago/src/monitoring/mod.rs`, the alert system pushes `Notification` to DataModel but never calls `webhooks::send_webhook()`. Add webhook delivery for fired alerts: when a `DiskWarning` alert fires, send `WebhookEvent::DiskWarning`; when `ContainerCrash` fires, send `WebhookEvent::ContainerCrash`. Map alert types to webhook events. The webhook call should be fire-and-forget (already is in `send_webhook`). **Acceptance**: Configure a webhook URL, trigger a disk warning (lower threshold temporarily to 1%), confirm HTTP POST received. Deploy and verify.
|
||||||
|
|
||||||
- [ ] **IDENT-01** — Auto-generate Nostr keypair during identity creation. In `core/archipelago/src/identity_manager.rs` `create()` method, after generating the Ed25519 keypair, immediately call `create_nostr_key()` on the same identity so every identity gets both Ed25519 (DID) and secp256k1 (Nostr) keys from creation. Update the `IdentityInfo` struct returned by `identity.create` and `identity.list` RPC to always include `nostr_pubkey` (hex) and `nostr_npub` (bech32) fields when present. **Acceptance**: Call `identity.create`, then `identity.get` — response includes both `did` and `nostr_npub`. Deploy and verify.
|
- [ ] **IDENT-01** — Auto-generate Nostr keypair during identity creation. In `core/archipelago/src/identity_manager.rs` `create()` method, after generating the Ed25519 keypair, immediately call `create_nostr_key()` on the same identity so every identity gets both Ed25519 (DID) and secp256k1 (Nostr) keys from creation. Update the `IdentityInfo` struct returned by `identity.create` and `identity.list` RPC to always include `nostr_pubkey` (hex) and `nostr_npub` (bech32) fields when present. **Acceptance**: Call `identity.create`, then `identity.get` — response includes both `did` and `nostr_npub`. Deploy and verify.
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user