fix(mesh/reticulum): kill the whole daemon process group on drop

The reticulum daemon is a PyInstaller one-file binary: a bootloader parent
that forks the real Python process. `kill_on_drop`/`start_kill()` only SIGKILL
the bootloader, orphaning the forked child — which keeps holding the RNode
serial port. Across the listener's 30-min RX-stall reconnects this piled up
(observed 9 concurrent instances on a live node) all clutching /dev/ttyUSB0,
garbling the RNode so it stopped transmitting entirely.

Spawn the daemon as its own process-group leader (`process_group(0)`) and, on
drop, signal the whole group (SIGTERM for a clean RNode/socket release, then
SIGKILL as a hard backstop) so the forked child can never be orphaned.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dorian 2026-07-01 21:29:54 +01:00
parent 81444ab4a8
commit be50c886bb
3 changed files with 22 additions and 3 deletions

1
core/Cargo.lock generated
View File

@ -128,6 +128,7 @@ dependencies = [
"hyper-ws-listener",
"iroh",
"iroh-blobs",
"libc",
"mainline",
"mdns-sd",
"nostr-sdk",

View File

@ -22,6 +22,7 @@ iroh-swarm = ["dep:iroh", "dep:iroh-blobs"]
[dependencies]
# Core dependencies
tokio = { version = "1", features = ["full"] }
libc = "0.2" # process-group signalling for the supervised reticulum daemon
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
anyhow = "1.0"

View File

@ -104,6 +104,11 @@ fn daemon_command(
.arg(x);
}
cmd.kill_on_drop(true)
// Run the daemon as its own process-group leader. The packaged binary is
// a PyInstaller one-file bootloader that forks the real Python process;
// making it a group leader lets Drop signal the WHOLE group so the
// forked child can't be orphaned and keep holding the serial port.
.process_group(0)
.stdin(std::process::Stdio::null())
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped());
@ -945,9 +950,21 @@ fn contains_detect_resp(buf: &[u8]) -> bool {
impl Drop for ReticulumLink {
fn drop(&mut self) {
// Best-effort: ask the daemon to shut down cleanly (frees the serial
// port promptly); `kill_on_drop` on the Command is the hard backstop
// if the daemon doesn't exit in time.
// The packaged daemon is a PyInstaller one-file bootloader that forks the
// real Python process into the same (leader) group we spawned it in.
// SIGKILLing only the bootloader (what `start_kill`/`kill_on_drop` do)
// orphans that child, which keeps holding the serial port — the root
// cause of daemons piling up across reconnects and jamming the RNode.
// Signal the whole process group instead: SIGTERM is caught by the
// daemon's handler (clean RNode + socket release), and SIGKILL is the
// hard backstop so a wedged daemon can never survive.
if let Some(pid) = self.child.id() {
let pgid = -(pid as i32);
unsafe {
libc::kill(pgid, libc::SIGTERM);
libc::kill(pgid, libc::SIGKILL);
}
}
let _ = self.child.start_kill();
let _ = std::fs::remove_file(&self.socket_path);
}