fix: bulletproof mesh serial connection — PrivateDevices, auto-detect fallback, backoff

Root cause: systemd PrivateDevices=yes hid /dev/ttyUSB* from the service,
preventing .198 from connecting to its Heltec V3 after the security hardening.

Changes:
- Set PrivateDevices=no in systemd service (serial access needs physical devices;
  other hardening layers remain: NoNewPrivileges, ProtectSystem, RestrictNamespaces)
- Add SupplementaryGroups=dialout for explicit serial permissions
- Add fallback auto-detect when configured serial path fails to open
- Add exponential backoff on reconnect (5s→60s cap) to reduce log spam
- Add pre-open device existence check with actionable error messages
- Add udev rule (99-mesh-radio.rules) for stable /dev/mesh-radio symlink
- Add /dev/mesh-radio to serial candidate list (checked first)
- Add Connect button per detected device in Mesh UI
- Deploy udev rule to both servers and ISO build
- Fix FEDI_HASH unbound variable in deploy script
- Fix deploy binary step to handle hung service stop gracefully

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dorian 2026-03-18 10:50:13 +00:00
parent 428d11c8e2
commit 7278397209
7 changed files with 157 additions and 24 deletions

View File

@ -29,8 +29,11 @@ const SYNC_INTERVAL: Duration = Duration::from_secs(10);
/// Maximum stored messages (circular buffer).
const MAX_MESSAGES: usize = 100;
/// Delay before reconnection attempt after device disconnect.
const RECONNECT_DELAY: Duration = Duration::from_secs(10);
/// Initial delay before reconnection attempt after device disconnect.
const RECONNECT_DELAY_INIT: Duration = Duration::from_secs(5);
/// Maximum reconnect delay (cap for exponential backoff).
const RECONNECT_DELAY_MAX: Duration = Duration::from_secs(60);
/// Number of consecutive write failures before we consider the device dead
/// and trigger a reconnection cycle.
@ -150,6 +153,7 @@ pub fn spawn_mesh_listener(
tokio::spawn(async move {
let mut shutdown = shutdown;
let mut cmd_rx = cmd_rx;
let mut reconnect_delay = RECONNECT_DELAY_INIT;
loop {
if *shutdown.borrow() {
info!("Mesh listener shutting down");
@ -170,9 +174,16 @@ pub fn spawn_mesh_listener(
{
Ok(()) => {
info!("Mesh session ended cleanly");
// Session was established before ending — reset backoff
reconnect_delay = RECONNECT_DELAY_INIT;
}
Err(e) => {
error!("Mesh session error: {}", e);
// Check if session was ever connected (vs failed to open)
let was_connected = state.status.read().await.device_connected;
if was_connected {
reconnect_delay = RECONNECT_DELAY_INIT;
}
error!("Mesh session error: {} (retry in {:?})", e, reconnect_delay);
}
}
@ -184,17 +195,42 @@ pub fn spawn_mesh_listener(
}
let _ = state.event_tx.send(MeshEvent::DeviceDisconnected);
// Wait before reconnecting
// Wait before reconnecting (exponential backoff)
tokio::select! {
_ = tokio::time::sleep(RECONNECT_DELAY) => {},
_ = tokio::time::sleep(reconnect_delay) => {},
_ = shutdown.changed() => {
if *shutdown.borrow() { return; }
},
}
// Increase backoff for next failure, cap at max
reconnect_delay = (reconnect_delay * 2).min(RECONNECT_DELAY_MAX);
}
})
}
/// Scan all candidate serial ports and open the first Meshcore device found.
async fn auto_detect_and_open() -> Result<(String, MeshcoreDevice, DeviceInfo)> {
let paths = super::serial::detect_serial_devices().await;
if paths.is_empty() {
anyhow::bail!("No serial devices found in /dev");
}
for path in &paths {
debug!(path = %path, "Probing for Meshcore device");
match MeshcoreDevice::open(path).await {
Ok(mut dev) => match dev.initialize().await {
Ok(info) => {
info!(path = %path, firmware = %info.firmware_version, "Found Meshcore device via auto-detect");
return Ok((path.clone(), dev, info));
}
Err(e) => debug!(path = %path, error = %e, "Not a Meshcore device"),
},
Err(e) => debug!(path = %path, error = %e, "Could not open serial port"),
}
}
anyhow::bail!("No Meshcore device found on {} candidate ports: {:?}", paths.len(), paths)
}
/// Run a single mesh session (connect, initialize, main loop).
async fn run_mesh_session(
state: &Arc<MeshState>,
@ -206,24 +242,25 @@ async fn run_mesh_session(
shutdown: &mut tokio::sync::watch::Receiver<bool>,
cmd_rx: &mut mpsc::Receiver<MeshCommand>,
) -> Result<()> {
// Detect device
let device_path = if let Some(path) = preferred_path {
path.to_string()
// Detect device — try preferred path first, fall back to auto-detect
let (device_path, mut device, device_info) = if let Some(path) = preferred_path {
match MeshcoreDevice::open(path).await {
Ok(mut dev) => match dev.initialize().await {
Ok(info) => (path.to_string(), dev, info),
Err(e) => {
warn!("Preferred path {} handshake failed: {} — trying auto-detect", path, e);
auto_detect_and_open().await?
}
},
Err(e) => {
warn!("Preferred path {} open failed: {} — trying auto-detect", path, e);
auto_detect_and_open().await?
}
}
} else {
let paths = super::serial::detect_serial_devices().await;
if paths.is_empty() {
anyhow::bail!("No serial devices found");
}
match super::serial::probe_for_meshcore(&paths).await {
Some((path, _)) => path,
None => anyhow::bail!("No Meshcore device found on available serial ports"),
}
auto_detect_and_open().await?
};
// Open and initialize
let mut device = MeshcoreDevice::open(&device_path).await?;
let device_info = device.initialize().await?;
// Update status
{
let mut status = state.status.write().await;

View File

@ -37,8 +37,21 @@ pub struct MeshcoreDevice {
impl MeshcoreDevice {
/// Open a serial port and verify it's a Meshcore device.
pub async fn open(path: &str) -> Result<Self> {
// Check device exists before trying to open (better error message)
match tokio::fs::metadata(path).await {
Ok(meta) => {
debug!(path = %path, permissions = ?meta.permissions(), "Device node exists");
}
Err(e) => {
anyhow::bail!(
"Serial device {} not accessible: {} (check PrivateDevices in systemd, or USB connection)",
path, e
);
}
}
let port = serial2_tokio::SerialPort::open(path, BAUD_RATE)
.context(format!("Failed to open serial port {}", path))?;
.context(format!("Failed to open serial port {} (permission denied? device busy?)", path))?;
info!(path = %path, baud = BAUD_RATE, "Opened serial port");
@ -329,7 +342,9 @@ impl MeshcoreDevice {
// ─── Device detection ───────────────────────────────────────────────────
/// Candidate serial device paths to check on Linux.
/// /dev/mesh-radio is a stable udev symlink (see 99-mesh-radio.rules).
const SERIAL_CANDIDATES: &[&str] = &[
"/dev/mesh-radio",
"/dev/ttyUSB0",
"/dev/ttyUSB1",
"/dev/ttyUSB2",

View File

@ -295,6 +295,12 @@ server {
NGINXCONF
fi
# Copy udev rule for mesh radio stable naming
if [ -f "$SCRIPT_DIR/configs/99-mesh-radio.rules" ]; then
cp "$SCRIPT_DIR/configs/99-mesh-radio.rules" "$WORK_DIR/99-mesh-radio.rules"
echo " Using 99-mesh-radio.rules from configs/"
fi
# Use archipelago.service from configs/ (User=root for Podman container access)
if [ -f "$SCRIPT_DIR/configs/archipelago.service" ]; then
cp "$SCRIPT_DIR/configs/archipelago.service" "$WORK_DIR/archipelago.service"
@ -1316,6 +1322,12 @@ echo " Warning: GRUB install had issues, trying alternative..."
chroot /mnt/target update-grub
# Install udev rule for mesh radio stable naming (/dev/mesh-radio)
if [ -f /cdrom/99-mesh-radio.rules ]; then
cp /cdrom/99-mesh-radio.rules /mnt/target/etc/udev/rules.d/99-mesh-radio.rules
echo " Installed mesh radio udev rule"
fi
# Enable services
chroot /mnt/target systemctl enable archipelago.service 2>/dev/null || true
chroot /mnt/target systemctl enable nginx.service 2>/dev/null || true

View File

@ -0,0 +1,6 @@
# Stable symlink for USB serial adapters used as mesh radios.
# Creates /dev/mesh-radio pointing to the underlying ttyUSB device.
# Supports: CP2102 (Heltec V3), CH340 (T-Beam), FTDI (RAK WisBlock).
SUBSYSTEM=="tty", ATTRS{idVendor}=="10c4", ATTRS{idProduct}=="ea60", SYMLINK+="mesh-radio", MODE="0660", GROUP="dialout"
SUBSYSTEM=="tty", ATTRS{idVendor}=="1a86", ATTRS{idProduct}=="7523", SYMLINK+="mesh-radio", MODE="0660", GROUP="dialout"
SUBSYSTEM=="tty", ATTRS{idVendor}=="0403", ATTRS{idProduct}=="6001", SYMLINK+="mesh-radio", MODE="0660", GROUP="dialout"

View File

@ -23,7 +23,10 @@ ReadWritePaths=/var/lib/archipelago
# Privilege restriction
NoNewPrivileges=yes
PrivateDevices=yes
# PrivateDevices=no: serial access to /dev/ttyUSB* needed for mesh radios.
# Device access still gated by Unix permissions (dialout group) + other sandboxing.
PrivateDevices=no
SupplementaryGroups=dialout
# Network restriction (allow only IPv4/IPv6 + Unix sockets)
RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6

View File

@ -16,6 +16,7 @@ const messageText = ref('')
const sendError = ref('')
const broadcasting = ref(false)
const configuring = ref(false)
const connectingDevice = ref<string | null>(null)
const chatScrollEl = ref<HTMLElement | null>(null)
let pollInterval: ReturnType<typeof setInterval> | null = null
@ -302,6 +303,15 @@ async function handleToggleEnabled() {
} finally { configuring.value = false }
}
async function handleConnectDevice(devicePath: string) {
connectingDevice.value = devicePath
try {
await mesh.configure({ enabled: true, device_path: devicePath } as Partial<import('@/stores/mesh').MeshStatus>)
} finally {
connectingDevice.value = null
}
}
function signalBars(rssi: number | null): number {
if (rssi === null) return 0
if (rssi > -60) return 4
@ -397,6 +407,14 @@ function truncatePubkey(hex: string | null): string {
<div v-for="dev in mesh.status.detected_devices" :key="dev" class="mesh-device-row">
<div class="mesh-device-indicator" />
<span class="mesh-device-path">{{ dev }}</span>
<button
v-if="!mesh.status?.device_connected"
class="glass-button mesh-connect-btn"
:disabled="connectingDevice !== null"
@click="handleConnectDevice(dev)"
>
{{ connectingDevice === dev ? 'Connecting...' : 'Connect' }}
</button>
</div>
</div>
</div>
@ -951,6 +969,13 @@ function truncatePubkey(hex: string | null): string {
font-family: monospace;
font-size: 0.8rem;
color: rgba(255, 255, 255, 0.7);
flex: 1;
}
.mesh-connect-btn {
padding: 3px 12px;
font-size: 0.75rem;
flex-shrink: 0;
}
/* ─── Off-grid banner ─── */

View File

@ -312,6 +312,24 @@ if [ "$BOTH" = true ]; then
' 2>/dev/null || true
fi
# Deploy udev rule for mesh radio to 198
UDEV_RULE="$PROJECT_DIR/image-recipe/configs/99-mesh-radio.rules"
if [ -f "$UDEV_RULE" ]; then
echo " Syncing udev rule to 198..."
scp $SSH_OPTS "$UDEV_RULE" "$TARGET_198:/tmp/99-mesh-radio.rules" 2>/dev/null || true
ssh $SSH_OPTS "$TARGET_198" '
if ! diff -q /tmp/99-mesh-radio.rules /etc/udev/rules.d/99-mesh-radio.rules >/dev/null 2>&1; then
sudo cp /tmp/99-mesh-radio.rules /etc/udev/rules.d/99-mesh-radio.rules
sudo udevadm control --reload-rules
sudo udevadm trigger --subsystem-match=tty
echo " Mesh radio udev rule installed"
else
echo " Mesh radio udev rule unchanged"
fi
rm -f /tmp/99-mesh-radio.rules
' 2>/dev/null || true
fi
# Dev mode + FileBrowser on 198
ssh $SSH_OPTS "$TARGET_198" '
# Dev mode
@ -425,7 +443,7 @@ if [ "$LIVE" = true ]; then
echo " Skipping backend deploy (--frontend-only)"
elif ssh $SSH_OPTS "$TARGET_HOST" "[ -f $TARGET_DIR/core/target/release/archipelago ]" 2>/dev/null; then
progress "Deploying backend binary"
ssh $SSH_OPTS "$TARGET_HOST" "sudo systemctl stop archipelago"
ssh $SSH_OPTS "$TARGET_HOST" 'sudo systemctl stop archipelago --no-block 2>/dev/null; sleep 2; sudo kill -9 $(pgrep -x archipelago) 2>/dev/null; sleep 1; true'
ssh $SSH_OPTS "$TARGET_HOST" "sudo cp $TARGET_DIR/core/target/release/archipelago /usr/local/bin/"
fi
@ -511,6 +529,23 @@ if [ "$LIVE" = true ]; then
' 2>/dev/null || true
fi
# Deploy udev rule for mesh radio stable naming (/dev/mesh-radio)
UDEV_RULE="$PROJECT_DIR/image-recipe/configs/99-mesh-radio.rules"
if [ -f "$UDEV_RULE" ]; then
scp $SSH_OPTS "$UDEV_RULE" "$TARGET_HOST:/tmp/99-mesh-radio.rules" 2>/dev/null || true
ssh $SSH_OPTS "$TARGET_HOST" '
if ! diff -q /tmp/99-mesh-radio.rules /etc/udev/rules.d/99-mesh-radio.rules >/dev/null 2>&1; then
sudo cp /tmp/99-mesh-radio.rules /etc/udev/rules.d/99-mesh-radio.rules
sudo udevadm control --reload-rules
sudo udevadm trigger --subsystem-match=tty
echo " Mesh radio udev rule installed"
else
echo " Mesh radio udev rule unchanged"
fi
rm -f /tmp/99-mesh-radio.rules
' 2>/dev/null || true
fi
# Deploy Claude API proxy (auto-install if missing)
progress "Setting up Claude API proxy"
ssh $SSH_OPTS "$TARGET_HOST" '
@ -782,7 +817,7 @@ MANIFEST_EOF
' 2>/dev/null)
eval "$DB_PASSWORDS"
# Fallback if hash not available
if [ -z "$FEDI_HASH" ]; then
if [ -z "${FEDI_HASH:-}" ]; then
FEDI_HASH='$2y$10$t9YjjxkiktrlYvjajB/zgOMDnSNVg4HqrbDqh47u7Jf42whNdxNqC'
fi