From fbfeeeb0f5788a02cec4cc9044eea490d221b4c6 Mon Sep 17 00:00:00 2001 From: archipelago Date: Tue, 30 Jun 2026 10:39:34 -0400 Subject: [PATCH] =?UTF-8?q?fix(mesh):=20native=20E2E=20DM=20for=20archy?= =?UTF-8?q?=E2=86=94archy=20text=20+=20software=20radio-reboot?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - send_message now sends archy↔archy plain text as a native TEXT_MESSAGE_APP DM (firmware PKC-encrypts E2E), not wrapped in the binary typed envelope that silently broke archy↔archy LoRa delivery. Archy peers' Sent rows are marked encrypted so the E2E pill shows; rich typed msgs still use the typed-wire path. - Add a software radio-reboot to recover a wedged/RX-deaf radio without physical access (and for the Device-tab settings panel): driver reboot() via AdminMessage reboot_seconds=97 (verified vs meshtastic/protobufs), MeshCommand::RebootRadio, MeshService::reboot_radio, RPC mesh.reboot-radio. - Handoff doc: docs/SESSION-1.8.0-OTA-PROGRESS.md "RESUME HERE" — RF link is the proven blocker (radios not hearing each other); modem_preset mismatch is the prime suspect; on-device Meshtastic-app check + fix plan documented. Co-Authored-By: Claude Opus 4.8 (1M context) --- core/archipelago/src/api/rpc/dispatcher.rs | 1 + .../archipelago/src/api/rpc/mesh/messaging.rs | 23 +++ core/archipelago/src/mesh/listener/mod.rs | 5 + core/archipelago/src/mesh/listener/session.rs | 16 ++ core/archipelago/src/mesh/meshtastic.rs | 32 ++++ core/archipelago/src/mesh/mod.rs | 101 ++++++----- docs/PRODUCTION-MASTER-PLAN.md | 44 +++-- docs/SESSION-1.8.0-OTA-PROGRESS.md | 157 +++++++++++++++++- 8 files changed, 312 insertions(+), 67 deletions(-) diff --git a/core/archipelago/src/api/rpc/dispatcher.rs b/core/archipelago/src/api/rpc/dispatcher.rs index b7db5354..3e132fb8 100644 --- a/core/archipelago/src/api/rpc/dispatcher.rs +++ b/core/archipelago/src/api/rpc/dispatcher.rs @@ -366,6 +366,7 @@ impl RpcHandler { "mesh.send" => self.handle_mesh_send(params).await, "mesh.send-channel" => self.handle_mesh_send_channel(params).await, "mesh.broadcast" => self.handle_mesh_broadcast().await, + "mesh.reboot-radio" => self.handle_mesh_reboot_radio(params).await, "mesh.configure" => self.handle_mesh_configure(params).await, "mesh.send-invoice" => self.handle_mesh_send_invoice(params).await, "mesh.send-coordinate" => self.handle_mesh_send_coordinate(params).await, diff --git a/core/archipelago/src/api/rpc/mesh/messaging.rs b/core/archipelago/src/api/rpc/mesh/messaging.rs index 62dd9011..cccc8715 100644 --- a/core/archipelago/src/api/rpc/mesh/messaging.rs +++ b/core/archipelago/src/api/rpc/mesh/messaging.rs @@ -86,6 +86,29 @@ impl RpcHandler { Ok(serde_json::json!({ "broadcast": true })) } + /// mesh.reboot-radio — Reboot the locally-connected radio firmware to + /// recover a wedged / RX-deaf radio. Optional `seconds` delay (default 2). + pub(in crate::api::rpc) async fn handle_mesh_reboot_radio( + &self, + params: Option, + ) -> Result { + let seconds = params + .as_ref() + .and_then(|p| p.get("seconds")) + .and_then(|v| v.as_i64()) + .unwrap_or(2); + + let service = self.mesh_service.read().await; + let svc = service + .as_ref() + .ok_or_else(|| anyhow::anyhow!("Mesh service not running. Enable mesh first."))?; + + svc.reboot_radio(seconds).await?; + info!(seconds, "Mesh radio reboot requested via RPC"); + + Ok(serde_json::json!({ "reboot": true, "seconds": seconds })) + } + /// mesh.configure — Enable/disable mesh and set device path. pub(in crate::api::rpc) async fn handle_mesh_configure( &self, diff --git a/core/archipelago/src/mesh/listener/mod.rs b/core/archipelago/src/mesh/listener/mod.rs index ccc9086a..b53d19a4 100644 --- a/core/archipelago/src/mesh/listener/mod.rs +++ b/core/archipelago/src/mesh/listener/mod.rs @@ -77,6 +77,11 @@ pub enum MeshCommand { payload: Vec, }, SendAdvert, + /// Reboot the locally-connected radio firmware to recover a wedged / + /// RX-deaf radio. Meshtastic-only; meshcore ignores it. + RebootRadio { + seconds: i64, + }, /// Re-fetch contact list from the radio device. RefreshContacts, /// Delete a contact from the firmware table (clear-all / unreachable wipe). diff --git a/core/archipelago/src/mesh/listener/session.rs b/core/archipelago/src/mesh/listener/session.rs index 7bee563c..8d0009f3 100644 --- a/core/archipelago/src/mesh/listener/session.rs +++ b/core/archipelago/src/mesh/listener/session.rs @@ -96,6 +96,15 @@ impl MeshRadioDevice { } } + async fn reboot(&mut self, seconds: i64) -> Result<()> { + match self { + // Meshcore has no equivalent local-admin reboot in our driver; the + // RX-deaf recovery this targets is Meshtastic-specific. + Self::Meshcore(_) => Ok(()), + Self::Meshtastic(device) => device.reboot(seconds).await, + } + } + async fn remove_contact(&mut self, pubkey: &[u8; 32]) -> Result<()> { match self { Self::Meshcore(device) => device.remove_contact(pubkey).await, @@ -901,6 +910,13 @@ async fn handle_send_command( *consecutive_write_failures = 0; } } + MeshCommand::RebootRadio { seconds } => { + if let Err(e) = device.reboot(seconds).await { + warn!("Failed to reboot radio: {}", e); + } else { + info!(seconds, "Radio reboot command sent to device"); + } + } MeshCommand::RefreshContacts => { refresh_contacts(device, state).await; } diff --git a/core/archipelago/src/mesh/meshtastic.rs b/core/archipelago/src/mesh/meshtastic.rs index c7806fdb..f83dc1f5 100644 --- a/core/archipelago/src/mesh/meshtastic.rs +++ b/core/archipelago/src/mesh/meshtastic.rs @@ -59,6 +59,10 @@ const FROM_RADIO_MAX: usize = 4096; const ADMIN_SET_CONFIG_FIELD: u64 = 34; /// AdminMessage.set_channel oneof field number (carries a `Channel`). const ADMIN_SET_CHANNEL_FIELD: u64 = 33; +/// AdminMessage.reboot_seconds oneof field number (int32). Verified against +/// meshtastic/protobufs admin.proto: `reboot_seconds = 97` (NOT 40 — the +/// payload_variant numbers jump after the setters). +const ADMIN_REBOOT_SECONDS_FIELD: u64 = 97; /// FromRadio.channel (field 10): a `Channel` streamed during want_config. const FROM_RADIO_CHANNEL: u64 = 10; const FROM_RADIO_QUEUE_STATUS: u64 = 11; @@ -383,6 +387,34 @@ impl MeshtasticDevice { Ok(()) } + /// Reboot the locally-connected radio via `AdminMessage { reboot_seconds }` + /// on the ADMIN_APP port — the same local-admin path `set_advert_name` / + /// `set_lora_region` use (no session passkey needed over serial). The + /// firmware reboots after `seconds`, which clears a wedged / RX-deaf radio + /// (a radio that has stopped hearing the mesh while still transmitting) and + /// re-runs its LoRa init. The listener's reboot→reconnect loop reopens the + /// serial link when it comes back. + pub async fn reboot(&mut self, seconds: i64) -> Result<()> { + let Some(node_num) = self.node_num else { + anyhow::bail!("Meshtastic reboot: node_num unknown"); + }; + // AdminMessage { reboot_seconds(97): int32 }. We only ever pass a small + // positive delay, which encodes as a plain varint. + let mut admin = Vec::new(); + encode_varint_field_into(ADMIN_REBOOT_SECONDS_FIELD, seconds as u64, &mut admin); + + let packet = encode_mesh_packet(node_num, ADMIN_APP, &admin); + self.send_to_radio(&encode_to_radio_variant(TO_RADIO_PACKET, &packet)) + .await + .context("Failed to send Meshtastic reboot admin packet")?; + + info!( + node_num, + seconds, "Sent Meshtastic radio reboot (device will reboot to recover)" + ); + Ok(()) + } + /// Provision archy's two channels so the radio works like off-the-shelf /// Meshtastic AND carries our private group: /// - slot 0 (PRIMARY) = the DEFAULT public channel (name "", default key) diff --git a/core/archipelago/src/mesh/mod.rs b/core/archipelago/src/mesh/mod.rs index 0a58b3d9..b0350282 100644 --- a/core/archipelago/src/mesh/mod.rs +++ b/core/archipelago/src/mesh/mod.rs @@ -1542,52 +1542,45 @@ impl MeshService { /// MeshMessage carries a stable MessageKey — this is what makes replies /// and reactions addressable against plain text bubbles. pub async fn send_message(&self, contact_id: u32, text: &str) -> Result { - use crate::mesh::message_types::{MeshMessageType, TypedEnvelope}; let seq = self.state.next_send_seq(contact_id).await; - // Stock (non-archipelago) radio contacts — e.g. a phone running the - // MeshCore app — can't decode our typed envelope and would render it as - // garbled bytes. Send them the raw text as a plain native DM instead. - // Archipelago peers still get the typed envelope (seq/reply/reaction - // addressing + encryption). - if !self.is_archy_peer(contact_id).await { - let dest_prefix = self.peer_dest_prefix(contact_id).await?; - self.state - .send_cmd(listener::MeshCommand::SendNativeText { - dest_pubkey_prefix: dest_prefix, - payload: text.as_bytes().to_vec(), - }) - .await - .map_err(|_| anyhow::anyhow!("Mesh listener not running"))?; - return Ok(self - .record_sent_typed( - contact_id, - "text", - text, - None, - seq, - Some("lora".to_string()), - false, - ) - .await); - } - // Sign the envelope with our archipelago identity key so the receiver - // can authenticate us over LoRa (it verifies against our bound - // `arch_pubkey_hex`). This is what lets a `!ai` typed in chat to a - // trusted node pass the receiver's `trusted_only` gate over the radio — - // an unsigned radio packet can never authenticate. The signature is - // optional on the wire and ignored by peers that don't know our key, so - // it stays backward compatible. (Federation/Tor sends already sign in - // `send_typed_wire_via_federation`.) `with_seq` is applied after signing - // — seq is not covered by the signature. - let envelope = TypedEnvelope::new_signed( - MeshMessageType::Text, - text.as_bytes().to_vec(), - &self.signing_key, - ) - .with_seq(seq); - let wire = envelope.to_wire()?; - self.send_typed_wire(contact_id, wire, "text", text, None, seq) + // Plain chat text — to BOTH archy peers and stock devices — is sent as a + // native Meshtastic DM on TEXT_MESSAGE_APP. The firmware end-to-end + // (PKC / Curve25519) encrypts a directed DM whenever it knows the + // destination's public key, which archy peers exchange via NodeInfo, so + // the message is delivered E2E and surfaces as chat on every client. + // + // We deliberately do NOT wrap archy↔archy text in our binary typed + // envelope here. Meshtastic firmware 2.7.x will not deliver an opaque + // directed payload as a message: PRIVATE_APP is treated as opaque app + // data (never shown as chat), and a base64 envelope overflows a single + // LoRa frame and chunk-fails. Wrapping text was exactly what silently + // broke archy↔archy LoRa while archy→stock (plain text) kept working. + // Rich typed messages (invoice/coordinate/reaction/…) still use the + // typed-wire path via `send_typed_wire`; only plain Text goes native. + let dest_prefix = self.peer_dest_prefix(contact_id).await?; + self.state + .send_cmd(listener::MeshCommand::SendNativeText { + dest_pubkey_prefix: dest_prefix, + payload: text.as_bytes().to_vec(), + }) .await + .map_err(|_| anyhow::anyhow!("Mesh listener not running"))?; + // The firmware PKI-encrypts a directed DM to any peer whose key it knows; + // archy peers always exchange keys, so mark those Sent rows E2E so the + // pill shows immediately. (The receiver independently stamps E2E from the + // radio's `pki_encrypted` flag, so an inbound row is accurate regardless.) + let e2e = self.is_archy_peer(contact_id).await; + Ok(self + .record_sent_typed( + contact_id, + "text", + text, + None, + seq, + Some("lora".to_string()), + e2e, + ) + .await) } /// Whether `contact_id` is an archipelago peer (vs a stock meshcore client). @@ -1724,6 +1717,26 @@ impl MeshService { Ok(()) } + /// Reboot the locally-connected radio firmware to recover a wedged / + /// RX-deaf radio (one that has stopped hearing the mesh while still able to + /// transmit). The device reconnects via the listener's reboot→reconnect + /// loop. `seconds` is the firmware reboot delay. + pub async fn reboot_radio(&self, seconds: i64) -> Result<()> { + let status = self.state.status.read().await; + if !status.device_connected { + anyhow::bail!("No mesh device connected. Check USB connection."); + } + drop(status); + + self.state + .send_cmd(listener::MeshCommand::RebootRadio { seconds }) + .await + .map_err(|_| anyhow::anyhow!("Mesh listener not running"))?; + + info!(seconds, "Mesh radio reboot triggered"); + Ok(()) + } + /// Current mesh-AI assistant settings (issue #50). pub async fn assistant_config(&self) -> listener::AssistantConfig { self.state.assistant.read().await.clone() diff --git a/docs/PRODUCTION-MASTER-PLAN.md b/docs/PRODUCTION-MASTER-PLAN.md index 24d2c537..b6662e54 100644 --- a/docs/PRODUCTION-MASTER-PLAN.md +++ b/docs/PRODUCTION-MASTER-PLAN.md @@ -979,20 +979,34 @@ this match. - Reference: the existing `package-install-prune-check` dependency descriptor (dependencies.rs:208) is the seam to make data-driven. -## 10d. Mesh — Meshtastic MeshCore-parity (in the fleet binary; one open bug) (2026-06-26) +## 10d. Mesh — Meshtastic MeshCore-parity (active blocker: stock 3ccc LoRa text) (2026-06-30) -**Status: shipped as commit `8fdb45e8` and now riding in the rolled fleet binary** (built into the -#9 deploy from HEAD, sha `0060dcd6…`). The Meshtastic driver auto-provisions LoRa **region (EU_868)** -and a shared **channel "archipelago"** via the official admin API (`set_config`=field34, -`set_channel`=field33) — discovery, bidirectional RF, and **sending** are all verified on **.116 + .228**. -Detail + history: [[project_meshtastic_parity]]. +**Current deployed canary:** `.116` is running commit `b4531bb4` with backend sha +`4ab53e539d89679ef664401a9a57996267772fed02327abc2912c3e77543acbf` and frontend bundle +`index-YOAeJF7w.js` / `Mesh-BSAo88jN.js`. `main` was pushed to `gitea-vps2`. -**Open work (slot after WS-F #9–11, before/with multinode):** -- **RECEIVED-message surfacing bug** — the running driver does **not** surface received messages - (`mesh.messages` stays `[]`) even though the radio physically receives them. An instrumentation - build was in flight to locate where the inbound packet is dropped between the radio serial/BLE read - and the `mesh.messages` store. This is the one blocker to closing MeshCore parity. -- **.198 radio is bad** — won't persist config (needs a reflash) so it's not a usable mesh test node; - use .116/.228 for mesh verification. -- Definition of done: a message sent from a MeshCore/Meshtastic peer on channel "archipelago" appears - in `mesh.messages` on the receiving archipelago node, end-to-end, on ≥2 LAN nodes. +**What is fixed in this deployed canary:** +- Public stock Meshtastic interop is intentional: slot 0 PRIMARY is the public default LongFast + channel (`name=""`, default PSK); slot 1 SECONDARY is `archipelago`. +- Outgoing Meshtastic messages to stock peer `3ccc` are recorded with real 2026 timestamps and + `transport:"lora"` in RPC. The Mesh UI label maps `lora` to **LoRa**, not "Mesh". +- Post-send message refresh now polls briefly so FIPS/Tor/LoRa pills do not require a manual browser + refresh. +- Off-grid mode now blocks the mesh-chat federation fallback path as well as the generic transport + router: when enabled it forces LoRa-only sends and the UI banner reads + `Tor/FIPS disabled - LoRa only`. +- Empty mesh-chat placeholder opacity was reduced. + +**Still broken / resume here:** +- Stock Meshtastic peer `3ccc` -> `.116` LoRa text still does **not** surface in `mesh.messages`. +- Live `.116` logs prove bytes arrive from 3ccc, but the custom Meshtastic protobuf parser rejects + the packet before it becomes an inbound frame: + `Meshtastic FromRadio.packet did not parse into a decoded MeshPacket len=73 head=0dcc3c3e43153ca5b5432a16df56cbed`. +- 3ccc NodeInfo is discovered and PKC-capable: + `Meshtastic peer is PKC-capable (NodeInfo public_key) node=1128152268 key_len=32`. +- Other received packets are decoded and intentionally ignored as non-text (`portnum=3/4/5`), so + the serial reader is alive; the remaining blocker is the exact `MeshPacket` shape for stock + Meshtastic text. +- Definition of done: a new text sent from stock Meshtastic `3ccc` appears in `.116` + `mesh.messages` as an incoming LoRa message without a browser refresh, and `.116` -> `3ccc` + visibly arrives in the Meshtastic app. diff --git a/docs/SESSION-1.8.0-OTA-PROGRESS.md b/docs/SESSION-1.8.0-OTA-PROGRESS.md index 6c8b0d1b..ccb3a726 100644 --- a/docs/SESSION-1.8.0-OTA-PROGRESS.md +++ b/docs/SESSION-1.8.0-OTA-PROGRESS.md @@ -1,6 +1,128 @@ # 1.8.0 OTA Session Progress -Updated: 2026-06-29 +Updated: 2026-06-30 + +--- + +## ▶️ RESUME HERE — archy↔archy LoRa (2026-06-30 PM) — READ FIRST + +**Goal:** archy↔archy text over Meshtastic LoRa must DELIVER and show the E2E pill, +identical in off-grid and normal mode. Test bed = `.116` / `.198` / `.228` (all EU_868). +Don't touch the federation/FIPS path. + +### TL;DR of where we are +The **archy software is correct and deployed.** The blocker is now PROVEN to be at the +**radio/RF layer: the three radios are not hearing each other over the air at all.** No +amount of archy code change will fix that until the radios actually RF-link. **Resume by +testing the radios directly at home (Meshtastic phone app over Bluetooth) — see "DO THIS +FIRST AT HOME" below.** + +### What is DONE and deployed (commit pending — see below) +- **E2E send fix** (`core/archipelago/src/mesh/mod.rs` `send_message`, ~L1542): archy↔archy + plain chat text is now sent as a **native `TEXT_MESSAGE_APP` DM** (firmware PKC-encrypts + it E2E), NOT wrapped in our binary typed envelope. Archy peers' Sent rows are marked + `encrypted=true` so the pill shows. Rich typed msgs still use `send_typed_wire`. This was + the original root-cause fix (envelope-wrapped text silently broke archy↔archy LoRa). +- **NEW: software radio-reboot** end-to-end, so a wedged/RX-deaf radio can be rebooted + without physical access (and for the Device-tab settings panel the user requested): + - `meshtastic.rs`: `reboot(seconds)` driver method + `ADMIN_REBOOT_SECONDS_FIELD = 97` + (verified vs meshtastic/protobufs admin.proto — `set_owner=32/set_channel=33/set_config=34` + matched our existing constants, confirming the proto read). + - `listener/mod.rs`: `MeshCommand::RebootRadio { seconds }`. + - `listener/session.rs`: device-enum `reboot()` dispatch (Meshtastic only) + handler arm. + - `mesh/mod.rs`: `MeshService::reboot_radio(seconds)`. + - `api/rpc/mesh/messaging.rs`: `handle_mesh_reboot_radio` → RPC **`mesh.reboot-radio`** + `{seconds?}` (default 2); dispatcher arm in `api/rpc/dispatcher.rs`. + - `cargo check` passes. Built release **sha `ba4aed590027690d`** and DEPLOYED + active on + `.116/.198/.228`. The RPC works (`{"reboot":true,"seconds":2}`). + - ⚠️ **Caveat:** when called, archy logged "Sent Meshtastic radio reboot" but the radio did + **not** visibly reboot afterward (no config re-stream). Either field 97 is still off, or + newer firmware requires an admin session passkey even over local serial, or the USB serial + stayed open through the 2s reboot so no reconnect was logged. **Needs on-device verification.** + +### The hard evidence (why "nothing works") +- Directed DM tests `.198→.228` AND `.116→.228` (neither path reflashed): sender logs + `Sent plain native DM dest=30d258436d65 part=1 total=1` and RPC returns `sent:true, + encrypted:true`, but `.228` logs **nothing** — packet never reaches archy from the radio. +- A raw broadcast from `.198` (`mesh.broadcast`) was accepted by its radio but **not heard** + by `.228`/`.116`. +- In an 8-minute window, **all three nodes received 0 inbound OTA packets from any other node.** + Each only logs its OWN once-a-minute `Broadcast Meshtastic NodeInfo advert` + local TX + `field=11` queue-status. `.228 mesh.status` = `messages_received:1` total. +- `.198`'s radio is alive and transmitting NodeInfo every 60s — so it's not dead; it's that + **reception is broken on the receivers.** A radio cannot drop a broadcast AND a unicast to + its own node number while config matches, unless it simply isn't on the same airwaves. +- archy provisioning is correct & identical across nodes (read back from device): PRIMARY = + public LongFast (`name="" psk_len=1`), SECONDARY = `archipelago`, region=3 (EU_868). Admin + field constants verified. The send path hands the radio a correct unicast MeshPacket + (`to`=node, want_ack, hop_limit=3, plaintext `decoded` for the firmware to PKC-encrypt). + +### PRIME SUSPECT (software-fixable) — modem-preset / frequency mismatch +archy only ever writes `region` + `use_preset` and **never explicitly pins `modem_preset`** +(it parses region but not preset; `set_lora_region` relies on the LongFast default). If ANY +radio has a non-default modem preset / frequency slot persisted (e.g. set via the Meshtastic +app, or a different factory default after the `.198` reflash), the radios are on **different +airwaves despite identical channel name + region**, and archy would never correct it. + +### DO THIS FIRST AT HOME (decisive, ~2 min, only the user can do it) +Open the **Meshtastic phone app over Bluetooth** (works alongside archy's USB serial) on each +of `.116/.198/.228` and check: +1. Do the 3 nodes **see each other** in the node list (recent "heard")? → if NO, they're not + RF-reaching (preset/freq/antenna/range). +2. Do all 3 show the **same** Modem preset (LongFast), Region (EU_868), Frequency slot, and + the same PRIMARY channel? → any difference = the cause. +This single test separates "archy misconfigures the radios" from "radios physically can't +reach each other." + +### THEN — the archy fix to apply (if preset/config differs) +Make archy **authoritatively write the full LoRaConfig** and force re-provision so all radios +converge: in `core/archipelago/src/mesh/meshtastic.rs::set_lora_region` (and its +caller/guard `ensure_lora_region` ~L304), explicitly set `modem_preset = LONG_FAST (0)` as a +field in the LoRaConfig (it's currently omitted/defaulted), and make the startup provision +path rewrite LoRa config when the preset doesn't match, then reboot the radio (use the new +`mesh.reboot-radio`). Also verify the `mesh.reboot-radio` actually reboots the radio +on-device (the caveat above). + +### TEST RECIPE (works on each node) +- RPC helper used this session: a node-side `rpc.sh` that logs in (password + `ThisIsWeb54321@`), grabs the `csrf_token` cookie, echoes it as `X-CSRF-Token`, and POSTs to + `http://127.0.0.1:5678/rpc/v1`. Recreate it or run archy's RPC directly. Methods: + `mesh.peers`, `mesh.status`, `mesh.messages`, `mesh.send {contact_id,message}`, + `mesh.broadcast`, `mesh.reboot-radio {seconds}`. +- **LoRa contact ids:** `.116=1135977788` (prefix `3ca5b543`), `.198=3677050140` (`db2b551c`), + `.228=1129894448` (prefix `30d25843`), stock `3ccc=1128152268`. +- **Link health check (run on each node):** look for inbound `from=Some("!...")` lines in + `journalctl -u archipelago` that are NOT the node's own `Broadcast ... NodeInfo advert`. If + zero across all nodes → RF link is down (the current state). +- **E2E success criteria:** send `.198→.228`, the marker appears in `.228` `mesh.messages` as + an inbound row with `encrypted:true` / `transport:"lora"`, AND `.116↔.228` likewise. + +### DEPLOY / BUILD RECIPE +- Build: from `core/`, `CARGO_TARGET_DIR=/tmp/archy-hotfix-target CARGO_INCREMENTAL=0 cargo + build --release -p archipelago --bin archipelago`. (If `rust-lld: undefined hidden symbol`, + it's incremental cache — `CARGO_INCREMENTAL=0` fixes it.) +- SSH key `~/.ssh/archipelago-deploy` is authorized on `.116/.198/.228`. SSH/UI/RPC password + `ThisIsWeb54321@`. Per node: scp the binary, `sudo systemctl stop archipelago` → + `kill -9 $(pgrep -x archipelago)` → `install -m0755` to `/usr/local/bin/archipelago` → + `systemctl start archipelago`. Verify by `sha256sum` match + `systemctl is-active`. +- **Current deployed sha on all 3 = `ba4aed590027690d`** (the reboot-enabled build). + +### Fleet state (as of 2026-06-30 PM) +- All 3 nodes on binary `ba4aed59`, active. Off-grid mode currently OFF (`mesh_only:false`). +- `.198` radio was reflashed to factory `firmware-heltec-v3-2.7.26` (recovered from corrupt + NVS); region EU_868 persists. Its archy identity is NOT re-bound on `.228` (`.228` shows + `.198` as raw radio "Meshtastic 551c", `arch_pubkey_hex` absent) because `.228` hasn't heard + `.198`'s identity broadcast — a downstream symptom of the dead RF link, not a separate bug. +- The radios are powered & each transmitting; they are simply not hearing each other. + +### Deferred UI (after LoRa works) +- Device-tab **settings panel** (gear/desktop) — host the "Reboot radio" button there; calls + `mesh.reboot-radio`. Scoping done: add to the Mesh.vue actions row (mirrors Broadcast/Off-Grid + buttons) + a `rebootRadio()` method in `neode-ui/src/stores/mesh.ts`. See `Mesh.vue` ~L1484 + actions row and `mesh.ts` ~L373 `broadcastIdentity()` pattern. +- Device-onboarding modal (detect plugged-in radio). + +--- Current scope: - Preserve existing mesh work: E2E indicators, FIPS/Tor transport indicators, typed-message paths, Meshtastic region/channel provisioning, and dirty Meshtastic receive-attempt changes. @@ -35,11 +157,30 @@ Do not discard: - That would make live `3ccc` packets look older than 10 minutes and get dropped before `mesh.messages`. - Current patch treats implausibly early `rx_time` values as unknown rather than stale. -.116 live validation: +.116 live validation after 2026-06-30 hotfix: - `.116` reachable by SSH; `archipelago` active; `/dev/mesh-radio -> ttyUSB0` attached. -- Recent logs show repeated `FromRadio.queueStatus` frames (`field 11`, bytes like `5a04100e1810`) being rejected by the serial frame prevalidator as invalid payloads. -- Current patch accepts `FromRadio.queueStatus` as a valid ignored frame so non-message status frames no longer look like corrupt serial data. -- Focused Meshtastic tests: green, 7/7. -- Updated patch deployed to `.116` as binary sha `028ec6ff9a60ca8970c081987457d78ed1c517cd81f7089f51b9a01745b5c3c4`. -- After redeploy, logs show `FromRadio field=11` accepted and no new `Dropping stale ... !433e3ccc` entries in the checked post-deploy window. -- There are stale other-agent shell watcher processes on `.116` referencing `RXDIAG`; leave alone unless they interfere. +- Current canary deploy is commit `b4531bb4`; backend sha + `4ab53e539d89679ef664401a9a57996267772fed02327abc2912c3e77543acbf`; frontend bundle + `index-YOAeJF7w.js` / `Mesh-BSAo88jN.js`. +- `main` pushed to `gitea-vps2`. +- RPC on `.116`: + - `transport.status` currently reports `mesh_only:false` (off-grid mode is not enabled unless + the user toggles it). + - `mesh.status` reports Meshtastic connected: `device_type:"meshtastic"`, + `self_node_id:1135977788`, `peer_count:13`. + - Recent `.116` -> `3ccc` sent rows are stored with real 2026 timestamps and `transport:"lora"`. +- UI/backend fixes included in `b4531bb4`: + - `transportLabel("lora")` displays **LoRa**. + - mesh sends refetch messages after send so transport pills settle without browser refresh. + - off-grid mode blocks the mesh-chat FIPS/Tor federation fallback and forces LoRa-only sends; + banner text is `Tor/FIPS disabled - LoRa only`. + - empty mesh-chat placeholder opacity reduced. +- Meshtastic diagnostics now identify the remaining blocker: + - 3ccc NodeInfo is discovered: + `Meshtastic peer is PKC-capable (NodeInfo public_key) node=1128152268 key_len=32`. + - Bytes from stock Meshtastic text reach `.116`, but the custom parser rejects the packet: + `Meshtastic FromRadio.packet did not parse into a decoded MeshPacket len=73 head=0dcc3c3e43153ca5b5432a16df56cbed`. + - Non-text packets decode and are ignored with port numbers (`portnum=3/4/5`), so the serial + read path is alive. Resume inside `core/archipelago/src/mesh/meshtastic.rs::parse_mesh_packet`. +- LoRa is therefore **not fully fixed** yet: stock `3ccc` -> `.116` text does not surface in + `mesh.messages`, and `.116` -> `3ccc` still needs user-visible confirmation in the Meshtastic app.