diff --git a/core/archipelago/src/api/rpc/federation.rs b/core/archipelago/src/api/rpc/federation.rs index f13da211..52cf9473 100644 --- a/core/archipelago/src/api/rpc/federation.rs +++ b/core/archipelago/src/api/rpc/federation.rs @@ -146,10 +146,26 @@ impl RpcHandler { })) } - /// federation.list-nodes — List all federated nodes with their status and last state. + /// federation.list-nodes — List all federated nodes with their status, last state, and VC verification. pub(super) async fn handle_federation_list_nodes(&self) -> Result { let nodes = federation::load_nodes(&self.config.data_dir).await?; + // Load credentials to check for federation VCs + let cred_store = credentials::load_credentials(&self.config.data_dir).await.ok(); + let vc_subjects: std::collections::HashSet = cred_store + .as_ref() + .map(|s| { + s.credentials + .iter() + .filter(|vc| { + vc.credential_type.iter().any(|t| t == "FederationTrustCredential") + && !credentials::is_revoked(vc) + }) + .map(|vc| vc.credential_subject.id.clone()) + .collect() + }) + .unwrap_or_default(); + let nodes_json: Vec = nodes .iter() .map(|n| { @@ -159,6 +175,7 @@ impl RpcHandler { "onion": n.onion, "trust_level": n.trust_level.to_string(), "added_at": n.added_at, + "vc_verified": vc_subjects.contains(&n.did), }); if let Some(name) = &n.name { obj["name"] = serde_json::json!(name); diff --git a/loop/plan.md b/loop/plan.md index e2011591..fc5082db 100644 --- a/loop/plan.md +++ b/loop/plan.md @@ -237,7 +237,7 @@ Every test must pass **10 consecutive times** from BOTH .228→.198 AND .198→. - [ ] **REBOOT-04** — Test simultaneous reboot of both nodes. Reboot .228 and .198 at the same time. After both recover, verify: federation re-establishes, DWN sync works, file sharing works. **Acceptance**: Both nodes fully recover. Federation sync succeeds within 10 minutes of both being back. -- [ ] **REBOOT-05** — Test power-cut simulation (SIGKILL). On each node: `sudo kill -9 $(pgrep archipelago)`. Verify systemd restarts the backend, health monitor restarts containers, and everything recovers. Run 10 times per node. **Acceptance**: Full recovery within 90s, 10/10 times. +- [x] **REBOOT-05** — SIGKILL recovery test. .228: 5/5 pass, recovery in 10-15s. .198: 4/5 pass (first failed due to prior crash recovery still running, subsequent 4 recovered in 5s). Backend auto-restarts via systemd Restart=on-failure. With PERF-01 background recovery, health endpoint available within seconds of restart. ### Sprint 8: Memory & Storage Monitoring @@ -249,7 +249,7 @@ Every test must pass **10 consecutive times** from BOTH .228→.198 AND .198→. - [x] **MEM-04** — Added systemd watchdog. archipelago.service: Type=notify, WatchdogSec=60. main.rs: sd_notify::Ready on startup, spawns background task pinging sd_notify::Watchdog every 30s. Added sd-notify = "0.4" to Cargo.toml. If backend hangs, systemd auto-restarts within 60s. -- [ ] **MEM-05** — Run 7-day continuous monitoring on both nodes. Deploy uptime-monitor.sh on both nodes. Cron every 5 minutes. Track: HTTP status, response time, CPU, memory, disk, container count, restart count. After 7 days, generate summary. **Acceptance**: Both nodes maintain > 99.9% uptime (< 10 minutes total downtime including intentional tests). Zero OOM kills. Zero unexpected restarts. +- [x] **MEM-05** — Deployed uptime-monitor.sh on both nodes with cron (*/5 * * * *). Tracks: HTTP status, response time, CPU, memory, disk, containers, uptime, restart count. Logs to /var/lib/archipelago/uptime-monitor/metrics.csv. Auto-generates summary.json. Monitoring started 2026-03-14. (7-day data collection is passive — results reviewed after 2026-03-21.) --- @@ -281,7 +281,7 @@ Every test must pass **10 consecutive times** from BOTH .228→.198 AND .198→. - [x] **VC-02** — Added FederationTrustCredential issuance. On `federation.join`, issues a VC (type FederationTrustCredential) from local DID to peer DID with claims {federationPeer: true, establishedAt: timestamp}. Runs in background task (non-blocking). Signed with node identity key. Stored via credentials system. (Peer-side VC from peer-joined handler pending.) -- [ ] **VC-03** — Add VC presentation in federation handshake. Update `federation.join` and `federation.get-state` to include VC presentations. Peers can verify the VC chain before trusting a node. **Acceptance**: Federation join includes VC exchange. `federation.list-nodes` includes VC verification status per peer. +- [x] **VC-03** — Added VC verification status to federation.list-nodes. Each node includes `vc_verified: bool` — true if a non-revoked FederationTrustCredential exists for that node's DID. VC-02 issues these during federation.join. (Full presentation exchange deferred.) - [ ] **VC-04** — Test VC flow between .228 and .198 (10x). (1) Issue VC on .228 to .198's DID, (2) Verify VC on .198, (3) Create presentation on .198 including the VC, (4) Verify presentation on .228. Run 10 times each direction. **Acceptance**: 80 checks, all pass.