feat(trust): pin release-root anchor + ship signed app-catalog
Pin RELEASE_ROOT_PUBKEY_HEX from the 2026-07-02 release-root signing ceremony
(signer did🔑z6MkkidEnEpo6qHMCNSZoNKWtvQvxq3whnaME9wGgEFhq7ur) so nodes verify
the publisher identity of the app-catalog. Sign releases/app-catalog.json in place.
Fix two floats that made the catalog unsignable: archy-btcpay-db manifest version
-> string, fedimint-clientd cpu_limit 0.25 -> 1 (u32). Add scripts/sign-catalog.sh
helper, the 1.8.0 release-hardening plan/tracker, and the commit-and-push project
rule in CLAUDE.md.
Backward-compatible: old binaries still accept the signed catalog; the pinned-anchor
binary ships in the next build/OTA.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
8b6485078a
commit
1977bdefb5
22
CLAUDE.md
22
CLAUDE.md
@ -26,6 +26,28 @@ Detailed sub-plans (all linked from the master):
|
|||||||
- Current per-app state → `docs/app-registry-status-2026-06-21.md`
|
- Current per-app state → `docs/app-registry-status-2026-06-21.md`
|
||||||
- Production test gate (exit criterion) → `tests/lifecycle/TESTING.md`
|
- Production test gate (exit criterion) → `tests/lifecycle/TESTING.md`
|
||||||
|
|
||||||
|
## Commit & push every unit of work (never violate)
|
||||||
|
|
||||||
|
**The #1 process rule: work is not "done" until it is committed AND pushed.** This
|
||||||
|
exists because finished work has been lost/clobbered by sitting uncommitted in the
|
||||||
|
shared tree across agents and sessions. To prevent that:
|
||||||
|
|
||||||
|
- **Commit each feature/fix the moment it works** — one focused, self-contained
|
||||||
|
commit per logical change (it compiles and its targeted tests pass). Do not let
|
||||||
|
unrelated changes accumulate uncommitted.
|
||||||
|
- **Push immediately after committing** so nothing lives only on one machine. `main`
|
||||||
|
is protected → push via `git push gitea-ai main` (account `ai`, see the memory
|
||||||
|
note); feature branches push to their own remote.
|
||||||
|
- **Never leave a stack of finished work uncommitted** overnight or when handing off
|
||||||
|
between agents — if you must pause mid-change, commit a clearly-labelled WIP
|
||||||
|
checkpoint rather than leaving it dirty.
|
||||||
|
- **Stage explicitly by path** (`git add <paths>`) when another agent's uncommitted
|
||||||
|
work shares the tree — never `git add -A` / `git commit -a`, which clobbers or
|
||||||
|
entangles their changes.
|
||||||
|
- **Never commit or push secrets** (mnemonics, private keys, API tokens). Signing is
|
||||||
|
done offline; artifacts (catalog/manifest) are signed, not the keys.
|
||||||
|
- Commit messages end with the `Co-Authored-By: Claude …` trailer.
|
||||||
|
|
||||||
## Invariants (never violate)
|
## Invariants (never violate)
|
||||||
|
|
||||||
- **Rootless Podman only.** No rootful, no Docker-socket mounts, no privileged
|
- **Rootless Podman only.** No rootful, no Docker-socket mounts, no privileged
|
||||||
|
|||||||
@ -16,9 +16,11 @@ use ed25519_dalek::VerifyingKey;
|
|||||||
|
|
||||||
/// Hex of the pinned Ed25519 release-root public key (32 bytes / 64 hex chars).
|
/// Hex of the pinned Ed25519 release-root public key (32 bytes / 64 hex chars).
|
||||||
///
|
///
|
||||||
/// Pinned 2026-07-01 (signer did:key:z6MkkidEnEpo6qHMCNSZoNKWtvQvxq3whnaME9wGgEFhq7ur).
|
/// Pinned 2026-07-02 from the release-root signing ceremony
|
||||||
/// The corresponding mnemonic is held offline by the publisher — see
|
/// (signer did:key:z6MkkidEnEpo6qHMCNSZoNKWtvQvxq3whnaME9wGgEFhq7ur). The
|
||||||
/// `docs/workstream-b-signing-runbook.md` for the ceremony that produced this.
|
/// corresponding mnemonic is held offline by the publisher — see
|
||||||
|
/// `docs/workstream-b-signing-runbook.md`. Regenerate/verify with:
|
||||||
|
/// `RELEASE_MASTER_MNEMONIC=… archipelago ceremony pubkey`.
|
||||||
pub const RELEASE_ROOT_PUBKEY_HEX: Option<&str> =
|
pub const RELEASE_ROOT_PUBKEY_HEX: Option<&str> =
|
||||||
Some("5d15cbee8a108f7dd288c02d29a1d9d71f198acc99186aad8008b4f28d469951");
|
Some("5d15cbee8a108f7dd288c02d29a1d9d71f198acc99186aad8008b4f28d469951");
|
||||||
|
|
||||||
@ -53,9 +55,14 @@ mod tests {
|
|||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn unset_constant_is_none() {
|
fn pinned_constant_parses_to_a_valid_key() {
|
||||||
// Default build ships no pinned anchor yet.
|
// The release-root anchor is pinned (ceremony 2026-07-02); it must be
|
||||||
assert!(RELEASE_ROOT_PUBKEY_HEX.is_none());
|
// present and a well-formed 32-byte Ed25519 key.
|
||||||
|
let hex = RELEASE_ROOT_PUBKEY_HEX.expect("release-root anchor must be pinned");
|
||||||
|
assert!(
|
||||||
|
parse_pubkey_hex(hex).is_some(),
|
||||||
|
"pinned RELEASE_ROOT_PUBKEY_HEX is not a valid Ed25519 key"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|||||||
@ -149,6 +149,18 @@ mod tests {
|
|||||||
doc
|
doc
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Pin `test_key` as the release-root anchor for this process via the env
|
||||||
|
/// override. Needed because the baked-in `RELEASE_ROOT_PUBKEY_HEX` is the
|
||||||
|
/// real ceremony key, which no unit test can produce signatures for — so to
|
||||||
|
/// exercise the anchored-verification path we pin a key we can sign with.
|
||||||
|
/// Every call sets the same value, so parallel tests stay consistent.
|
||||||
|
fn pin_test_key_as_anchor() {
|
||||||
|
std::env::set_var(
|
||||||
|
"ARCHY_RELEASE_ROOT_PUBKEY",
|
||||||
|
hex::encode(test_key().verifying_key().to_bytes()),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn unsigned_document_reports_unsigned() {
|
fn unsigned_document_reports_unsigned() {
|
||||||
let doc = json!({"schema": 1, "apps": {}});
|
let doc = json!({"schema": 1, "apps": {}});
|
||||||
@ -156,18 +168,31 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn roundtrip_verifies() {
|
fn roundtrip_verifies_and_anchors_to_pinned_key() {
|
||||||
|
// With the anchor pinned to the signer, verification succeeds AND
|
||||||
|
// reports anchored == true (signer identity confirmed).
|
||||||
|
pin_test_key_as_anchor();
|
||||||
let signed = sign_into(&test_key(), json!({"schema": 1, "n": 42}));
|
let signed = sign_into(&test_key(), json!({"schema": 1, "n": 42}));
|
||||||
match verify_detached(&signed).unwrap() {
|
match verify_detached(&signed).unwrap() {
|
||||||
// No anchor pinned in the default test build → anchored == false.
|
SignatureStatus::Verified { anchored, .. } => assert!(anchored),
|
||||||
SignatureStatus::Verified { anchored, .. } => assert!(!anchored),
|
|
||||||
other => panic!("expected Verified, got {:?}", other),
|
other => panic!("expected Verified, got {:?}", other),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn signature_from_non_anchor_key_is_rejected() {
|
||||||
|
// A self-consistent signature from a key that is NOT the pinned anchor
|
||||||
|
// must hard-reject — this is what stops a mirror swapping in its own key.
|
||||||
|
pin_test_key_as_anchor();
|
||||||
|
let other_key = SigningKey::from_bytes(&[11u8; 32]);
|
||||||
|
let signed = sign_into(&other_key, json!({"schema": 1, "n": 42}));
|
||||||
|
assert!(verify_detached(&signed).is_err());
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn signature_survives_key_reordering() {
|
fn signature_survives_key_reordering() {
|
||||||
// Re-emitting the document with shuffled keys must not break the sig.
|
// Re-emitting the document with shuffled keys must not break the sig.
|
||||||
|
pin_test_key_as_anchor();
|
||||||
let signed = sign_into(&test_key(), json!({"b": 2, "a": 1}));
|
let signed = sign_into(&test_key(), json!({"b": 2, "a": 1}));
|
||||||
let reparsed: Value =
|
let reparsed: Value =
|
||||||
serde_json::from_str(&serde_json::to_string(&signed).unwrap()).unwrap();
|
serde_json::from_str(&serde_json::to_string(&signed).unwrap()).unwrap();
|
||||||
@ -179,6 +204,9 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn tampered_payload_is_rejected() {
|
fn tampered_payload_is_rejected() {
|
||||||
|
// Pin the signer so verification reaches the signature check (not an
|
||||||
|
// anchor-identity short-circuit), proving tamper detection itself.
|
||||||
|
pin_test_key_as_anchor();
|
||||||
let mut signed = sign_into(&test_key(), json!({"schema": 1, "n": 42}));
|
let mut signed = sign_into(&test_key(), json!({"schema": 1, "n": 42}));
|
||||||
signed
|
signed
|
||||||
.as_object_mut()
|
.as_object_mut()
|
||||||
|
|||||||
301
docs/1.8.0-RELEASE-HARDENING-PLAN.md
Normal file
301
docs/1.8.0-RELEASE-HARDENING-PLAN.md
Normal file
@ -0,0 +1,301 @@
|
|||||||
|
# Archipelago 1.8.0 — Release Hardening Plan & Tracker
|
||||||
|
|
||||||
|
> **The one living checklist for shipping 1.8.0.** Derived from a full-system deep
|
||||||
|
> audit (2026-07-02): backend security, backend code-quality, frontend, mesh,
|
||||||
|
> tests/release pipeline, and the ISO build. Supersedes nothing — it *sits above*
|
||||||
|
> `docs/UNIFIED-TASK-TRACKER.md` (day-to-day) as the release exit-criteria list.
|
||||||
|
> **Keep it updated: tick a box the moment an item lands, with the commit sha.**
|
||||||
|
|
||||||
|
**Definition of done for 1.8.0:** the supply chain is authenticated end-to-end
|
||||||
|
(§A), OTA self-update is safe and rollback-proven on real hardware (§B), no
|
||||||
|
secrets ship in the image (§F), and the single-node gate stays 5/5 green through
|
||||||
|
all of it. Everything else is polish that should not block the tag.
|
||||||
|
|
||||||
|
**Legend:** `[ ]` open · `[~]` in progress · `[x]` done · 🔴 critical · 🟠 high ·
|
||||||
|
🟡 medium · 🟢 low/polish · ⛔ blocked on you.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🎯 The single most important insight
|
||||||
|
|
||||||
|
The **release signing ceremony (Workstream B) is the linchpin.** ✅ The ceremony
|
||||||
|
KEY was generated (user confirmed 2026-07-02) — the hard offline part is done. But
|
||||||
|
the outputs are **not yet wired into the repo**: `anchor.rs:21` is still `None` and
|
||||||
|
`releases/app-catalog.json` carries no `signature`/`signed_by` (its `image_signature`
|
||||||
|
fields are literal `"cosign://..."` placeholders). Three mechanical steps remain,
|
||||||
|
split by who can run them: **(1)** pin the pubkey — needs only the *public* hex, can
|
||||||
|
be done in-repo now; **(2)** sign the catalog with the `RELEASE_MASTER_MNEMONIC` —
|
||||||
|
only the publisher, secret never touches a host; **(3)** implement + flip cosign
|
||||||
|
enforcement on the pull path. Until (1)+(2) land, every "verify the signature" task
|
||||||
|
below is written but not enforced. **This is still the critical path; §A converges on it.**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## §A — Supply-chain authentication (🔴 THE release blocker)
|
||||||
|
|
||||||
|
Today an attacker who controls the mirror IP (or any MITM on the plaintext HTTP
|
||||||
|
path) can ship an arbitrary root binary, arbitrary container images, and an
|
||||||
|
arbitrary app catalog to the entire fleet — fully unattended under
|
||||||
|
`auto_apply`. These four items are one story and must land together.
|
||||||
|
|
||||||
|
- [x] 🔴 **Pin `RELEASE_ROOT_PUBKEY_HEX` + sign the catalog** — DONE 2026-07-02.
|
||||||
|
`anchor.rs` pinned to `5d15cbee…d469951` (signer
|
||||||
|
`did:key:z6MkkidEnEpo6qHMCNSZoNKWtvQvxq3whnaME9wGgEFhq7ur`); trust tests updated (16/16
|
||||||
|
green). `releases/app-catalog.json` signed in place (`signed_by` matches, 64-byte sig);
|
||||||
|
two blocking floats fixed en route (`archy-btcpay-db` version→string, `cpu_limit` 0.25→1).
|
||||||
|
Ship order (backward-compatible): signed catalog goes out first (old binaries still accept
|
||||||
|
it), pinned-anchor binary follows in the next build/OTA. **Still ahead:** (a) the
|
||||||
|
pinned-anchor binary must actually be built + shipped for enforcement to be live on nodes;
|
||||||
|
(b) flip "accept unsigned" → "reject unsigned" only after the whole fleet is on the pinned
|
||||||
|
binary (`container/app_catalog.rs:397`, the `Unsigned` arm) — see the next item.
|
||||||
|
- [ ] 🔴 **Enforce a signature on the OTA manifest before trusting it.**
|
||||||
|
`update.rs:68` fetches `http://146.59.87.168:3000/.../manifest.json` over cleartext
|
||||||
|
and parses/trusts it with no `trust::verify_detached` call; component sha256/blake3
|
||||||
|
are only checked against that same unauthenticated manifest → remote root RCE.
|
||||||
|
Move to HTTPS + pinned cert, require an Ed25519 release-root signature, and
|
||||||
|
**refuse `auto_apply` until the anchor is pinned.**
|
||||||
|
- [ ] 🔴 **Implement container image signature verification (cosign).**
|
||||||
|
`container/src/podman_client.rs:255` — `pull_image(.., _signature)` silently discards
|
||||||
|
the signature that the manifest threads all the way down
|
||||||
|
(`prod_orchestrator.rs:1978/2435`). Wire `sigstore-rs`/`cosign verify` (or
|
||||||
|
`podman pull --signature-policy`); hard-fail when a declared signature doesn't verify.
|
||||||
|
- [ ] 🟠 **Move the image mirror to HTTPS; drop `--tls-verify=false`.**
|
||||||
|
`podman_client.rs:641` `INSECURE_REGISTRY_HOSTS = ["146.59.87.168:3000"]` +
|
||||||
|
`config.rs:104,124` allowlist pull images over unauthenticated HTTP. Remove the raw-IP
|
||||||
|
entries; give the mirror a valid/pinned cert. (Same host also baked insecurely into
|
||||||
|
the ISO — see §F.)
|
||||||
|
- [ ] 🟠 **Validate every image string at the pull site, not just the RPC boundary.**
|
||||||
|
`is_valid_docker_image` runs in `install.rs:224`/`runtime.rs:549` but
|
||||||
|
`prod_orchestrator::install_fresh` (1978) and `resolve_catalog_image` (944-971) pass
|
||||||
|
catalog/manifest images straight to `pull_image`. Call the validator right before
|
||||||
|
every pull.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## §B — OTA self-update safety (🔴 1.8.0's headline feature is untested live)
|
||||||
|
|
||||||
|
The apply path itself is well-built (resumable download, staged-complete marker,
|
||||||
|
atomic swap, single-depth backup). The gaps are **authenticity** (§A) and
|
||||||
|
**verification depth** — plus the fact that the upgrade path has never run
|
||||||
|
end-to-end on real hardware.
|
||||||
|
|
||||||
|
- [ ] 🔴 **Deepen the post-OTA health check.** `update.rs:456` (`probe_frontend_once`)
|
||||||
|
passes on any 2xx/3xx from `GET /`, and `verify_pending_update` (494-593) only rolls
|
||||||
|
back on that. A release with a broken RPC API, dead containers, or failed LND unlock
|
||||||
|
passes and never rolls back. Add `/rpc/v1 update.status` + container-list/required-stack
|
||||||
|
health assertions before clearing the pending-verify marker.
|
||||||
|
- [ ] 🟠 **Run one real upgrade-from-vN-1 soak on hardware before tagging.**
|
||||||
|
No test installs the previous version, points it at a staged 1.8.0 manifest, applies,
|
||||||
|
and asserts health + rollback. This is the top release risk for an OTA release. A
|
||||||
|
two-VM (or two-node) harness is enough.
|
||||||
|
- [ ] 🟡 **Guard the frontend-build-no-op in the *actual* release path.** The
|
||||||
|
`ui-dist-version` grep guard (`tests/release/run.sh:82`) is behind `--with-build`, which
|
||||||
|
`scripts/create-release.sh:90` never passes → a stale frontend can ship with a valid
|
||||||
|
sha256. Call `run.sh --with-build --manifest` from create-release (or fold the grep in).
|
||||||
|
- [ ] 🟢 **publish-release-assets verifies size, not sha256** (`publish-release-assets.sh:97`).
|
||||||
|
Add a HEAD/GET sha256 compare so a size-correct/content-wrong mirror asset fails the
|
||||||
|
publish gate.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## §C — Backend robustness (🟠 stability, mostly low-effort/high-ROI)
|
||||||
|
|
||||||
|
Note: the `.unwrap()`/`panic!` worry is a **non-issue** — nearly all are in test
|
||||||
|
modules; production request/boot paths are essentially panic-free. The real risks:
|
||||||
|
|
||||||
|
- [ ] 🟠 **Log swallowed persistence writes.** ~30-40 dangerous `let _ = save_*().await`
|
||||||
|
sites discard durability failures with zero diagnostics: `server.rs:270` (mesh config),
|
||||||
|
`bitcoin_relay.rs:865` (relay state), `update.rs:163/1223` (mirrors/update state),
|
||||||
|
`registry.rs:158`, `mesh/status.rs:286`, `scheduler.rs:179`, `install.rs:34`. Convert to
|
||||||
|
`if let Err(e) = … { warn!(…) }`; leave genuinely fire-and-forget ones commented.
|
||||||
|
- [ ] 🟠 **Remove blocking `std::process::Command` from async handlers.**
|
||||||
|
`install.rs:2222` `published_host_port` (sync podman on the install path),
|
||||||
|
`dependencies.rs:316` (`df`), `system/handlers.rs:578` (`sudo`), `transport/fips.rs:50`
|
||||||
|
(`systemctl`) stall tokio workers under load. Convert to `tokio::process` or
|
||||||
|
`spawn_blocking`. Only 8 files use `std::process::Command` — bounded.
|
||||||
|
- [ ] 🟡 **Restrict Bitcoin RPC exposure.** `bootstrap.rs:409` writes
|
||||||
|
`rpcallowip=0.0.0.0/0`. Scope to the container subnet / `127.0.0.1`.
|
||||||
|
- [ ] 🟡 **Move generated secrets from env to file mounts.** `manifest.rs:1208-1226`
|
||||||
|
injects secrets as `-e KEY=value`, readable via `podman inspect` / `/proc/<pid>/environ`.
|
||||||
|
Prefer bind-mounting the existing `0600` secret file or `podman --secret`.
|
||||||
|
- [ ] 🟡 **Harden rate-limit IP extraction.** `middleware.rs:120-128` trusts
|
||||||
|
client-spoofable `X-Real-IP`/`X-Forwarded-For` → per-request bucket rotation defeats the
|
||||||
|
login limiter. Trust forwarded headers only from a configured proxy; have nginx set them.
|
||||||
|
- [ ] 🟢 **Include `seq` in the mesh signed preimage.** `message_types.rs:245-288` signs
|
||||||
|
`(t,v,ts)` but sets the anti-replay `seq` after signing → a radio MITM can alter ordering
|
||||||
|
without breaking the signature.
|
||||||
|
- [ ] 🟢 **Guard the short-DID slice panic** (`mesh/listener/decode.rs:566`) and gate the
|
||||||
|
dev-mode `password123` bypass (`auth.rs:18`) behind `#[cfg]` before it can reach a
|
||||||
|
release build.
|
||||||
|
- [ ] 🟢 **Apply the seccomp/apparmor profile** — `security/src/container_policies.rs:71` is a
|
||||||
|
TODO; the profile is defined but never applied to podman.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## §D — Frontend security & performance (🟠)
|
||||||
|
|
||||||
|
The untrusted mesh/LoRa chat path is **safe** (interpolation, no `v-html` — good).
|
||||||
|
The real issues are the app-bridge origin model and a bloated bundle.
|
||||||
|
|
||||||
|
- [ ] 🟠 **Validate `event.origin` + add consent gates in the NIP-07 nostr bridge.**
|
||||||
|
`stores/appLauncher.ts:385-490` derives the caller from the launcher's own URL, never
|
||||||
|
`event.origin`, and `getPublicKey`/`nip04.decrypt`/`nip44.decrypt` have no consent gate →
|
||||||
|
any co-resident iframe can deanonymize the nostr identity or use the node as a decryption
|
||||||
|
oracle while an app is open. Check `event.origin` against the open app's real origin; key
|
||||||
|
approvals on it; gate decrypt/getPublicKey like `signEvent`.
|
||||||
|
- [ ] 🟠 **Origin-check the `share-to-mesh` handler.** `App.vue:450-464` acts on
|
||||||
|
`{type:'share-to-mesh', cid}` from any sender and force-navigates to `/mesh` with the CID
|
||||||
|
pre-staged. Add `ev.origin === window.location.origin` (as `Chat.vue:95` already does).
|
||||||
|
- [ ] 🟡 **Decide the app-iframe isolation model.** `AppSessionFrame.vue:54` /
|
||||||
|
`AppLauncherOverlay.vue:79` embed apps same-origin with no meaningful `sandbox`; a
|
||||||
|
same-origin app can read the CSRF cookie + `localStorage`. Ideal fix (serve apps from a
|
||||||
|
per-app subdomain origin) is architectural — at minimum decide + document for 1.8.0.
|
||||||
|
- [ ] 🟡 **Shrink the 93 MB dist.** `assets/video/video-intro.mp4` is **14.7 MB**
|
||||||
|
(precached by the service worker → blocks PWA install), plus ~18 MB of ~1 MB full-screen
|
||||||
|
JPEGs. Convert backgrounds to WebP/AVIF at responsive sizes, lazy/stream the intro video,
|
||||||
|
and exclude video/audio from the Workbox precache. Biggest, easiest perf win.
|
||||||
|
- [ ] 🟢 **DOMPurify the `Server.vue` QR SVG** (`:283/:295` render `v-html` unsanitized while
|
||||||
|
`TwoFactorSection.vue` sanitizes the analogous SVG); guard the unguarded `pollInterval`
|
||||||
|
(`Mesh.vue:391`); surface silent data-fetch failures (`curatedApps.ts:58/71`).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## §E — Mesh transports (🟢 mostly done — verify & polish)
|
||||||
|
|
||||||
|
Confirmed **fixed in HEAD:** B8 (1970 timestamps), B6 (inbound RX surfacing), the
|
||||||
|
per-message transport pill, and the archy↔archy plain-TEXT-DM E2E fix. Remaining:
|
||||||
|
|
||||||
|
- [ ] 🟠 **Active Reticulum daemon-death detection.** `reticulum.rs:589` only `warn!`s on
|
||||||
|
socket EOF and `try_recv_frame` then returns `Ok(None)` forever; nothing calls
|
||||||
|
`child.try_wait()`. On an idle link a crashed daemon is invisible for up to 30 min (the
|
||||||
|
RX-stall timeout). Treat socket EOF as `Err` → immediate respawn. (Pairs with the current
|
||||||
|
`fix/reticulum-daemon-pdeathsig` branch work.)
|
||||||
|
- [ ] 🟡 **Persist chat history across restarts.** `state.messages` boots empty
|
||||||
|
(`listener/mod.rs:283`) while outbox/scheduler/peers survive — inconsistent; bubbles
|
||||||
|
vanish on restart. Add `mesh-messages.json` mirroring the `scheduler.rs`/`outbox.rs`
|
||||||
|
pattern (or explicitly accept the loss).
|
||||||
|
- [ ] 🟡 **Tighten the 30 s legacy dedup** (`listener/mod.rs:383-389`) — it silently drops a
|
||||||
|
peer legitimately sending identical text twice within 30 s.
|
||||||
|
- [ ] 🟢 **Wire the PyInstaller daemon binary into the release tarball / deploy script**
|
||||||
|
(Rust expects `/usr/local/bin/archy-reticulum-daemon`, `reticulum.rs:80`); add the RNode
|
||||||
|
udev rule; finish `ARCHY:2:` announce→`arch_pubkey_hex` binding (`reticulum.rs:119`).
|
||||||
|
- [ ] 🟢 **Duty-cycle guard for LoRa TX** — none exists; EU 868 is legally 1%. At minimum an
|
||||||
|
airtime budget/warning.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## §F — ISO / image build (🔴 one secret leak; otherwise 🟠 hardening)
|
||||||
|
|
||||||
|
`image-recipe/_archived/build-auto-installer-iso.sh` (3604 lines) is the real
|
||||||
|
builder; OTA is the normal update path but the ISO is what produces installable
|
||||||
|
media (latest artifact only one minor behind).
|
||||||
|
|
||||||
|
- [ ] ⛔🔴 **Anthropic API key — INTENTIONAL for alpha/beta, hard GO-LIVE gate.**
|
||||||
|
`build-auto-installer-iso.sh:2645` bakes a live `sk-ant-…` key into `claude-api-proxy.service`
|
||||||
|
so alpha/beta testers get frictionless AI (deliberate — per user 2026-07-02). **Do NOT
|
||||||
|
remove for alpha/beta.** Before public GA it MUST be removed + rotated + injected at runtime
|
||||||
|
(a second copy also exists in a worktree). Track it here so it can't be forgotten at launch.
|
||||||
|
- [ ] 🔴 **Per-device secrets on first boot.** The self-signed TLS **private key is generated
|
||||||
|
at build time** (`:426`) → every device ships the same key; SSH host keys likewise not
|
||||||
|
regenerated. Generate TLS + SSH host keys on first boot.
|
||||||
|
- [ ] 🟠 **Kill default credentials.** `archipelago`/`archipelago` (SSH+root), web `password123`,
|
||||||
|
and SSH `PasswordAuthentication yes` (`:411`) all ship. Lock root, force credential
|
||||||
|
creation in onboarding, disable SSH password auth (or force-change on first login).
|
||||||
|
- [ ] 🟠 **Sign + checksum the ISO.** Pipeline ends at `xorriso` with no `SHA256SUMS`, no
|
||||||
|
GPG/minisign, no Secure Boot (`BOOTX64.EFI` is unsigned though `grub-efi-amd64-signed` is
|
||||||
|
installed). Emit + sign checksums; wire signed Secure Boot.
|
||||||
|
- [ ] 🟠 **Registries over HTTPS in the image too** — `146.59.87.168:3000` / `git.tx1138.com`
|
||||||
|
are baked `insecure=true`/`tls_verify:false` (`:216`, `:2308`). (Ties to §A.)
|
||||||
|
- [ ] 🟡 **Add `unattended-upgrades` + a default-deny nftables firewall** (allow 22/80/443 +
|
||||||
|
mesh/WG). Neither exists today; OS packages drift until reflash and there is no host
|
||||||
|
firewall.
|
||||||
|
- [ ] 🟡 **Pin the build for reproducibility.** FIPS daemon is built from unpinned upstream
|
||||||
|
`main`, Tailscale from its live apt repo, and `scripts/image-versions.sh` uses many
|
||||||
|
`:latest`/`stable` tags (+ `bitcoin-ui:1.7.84-alpha`, 15 behind). Pin to commits/versions;
|
||||||
|
snapshot apt. Wire ISO version to `Cargo.toml` so it can't drift.
|
||||||
|
- [ ] 🟢 **Harden LUKS + roadmap A/B partitioning.** The LUKS data key sits in plaintext on the
|
||||||
|
unencrypted root (`:2137`); add TPM2/passphrase binding. Longer-term: A/B (or
|
||||||
|
factory-reset) partitions for safe OTA rollback, and a real install-time TUI
|
||||||
|
(`docs/INSTALL-SCREENS-DESIGN.md` exists but the installer is headless "press Enter").
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## §G — Refactor & code health (🟢 not release-blocking; do after the tag or opportunistically)
|
||||||
|
|
||||||
|
- [ ] 🟢 **Manifest-drive per-app special-casing.** App names are branched on across 5-7 Rust
|
||||||
|
files (`config.rs` 36 match arms, `runtime.rs` 17, `install.rs:275-287` dispatch,
|
||||||
|
`prod_orchestrator.rs:54-83` baseline/restart-sensitive lists). Move `baseline`,
|
||||||
|
`restart_sensitive`, `stack_members`, `multi_container` into the manifest schema; collapse
|
||||||
|
the five near-identical `install_*_stack()` wrappers into one generic call. **Biggest
|
||||||
|
maintainability win.**
|
||||||
|
- [ ] 🟢 **Route all podman/systemctl through `podman_client`.** 113 raw `Command::new("podman")`
|
||||||
|
+ 32 `systemctl` calls bypass the existing 952-LOC wrapper → untestable + the blocking-call
|
||||||
|
risk (§C). Consolidating also unlocks unit tests for the thinly-tested `package/` handlers
|
||||||
|
(`stacks.rs` 1 test, `config.rs` 2, `runtime.rs` 3, `install.rs` 7).
|
||||||
|
- [ ] 🟢 **Split the god-modules.** `prod_orchestrator.rs` (5,263 LOC) → `orchestrator/{reconcile,
|
||||||
|
host_ports,ownership,hooks}.rs`; `Mesh.vue` (2,485 LOC / 241 KB chunk) → sub-components.
|
||||||
|
Both are well-tested, so safe.
|
||||||
|
- [ ] 🟢 **Delete dead code.** ~4,100 LOC of orphan StartOS crates (`js-engine`, `models`,
|
||||||
|
`helpers`, `container-init`) not in the workspace or linked; the committed AppleDouble
|
||||||
|
`._*.rs` files; the committed `.venv/`/`build/`/`__pycache__` under the duplicate
|
||||||
|
`reticulum-daemon/` tree; promote `MeshRadioDevice` enum → trait.
|
||||||
|
- [ ] 🟢 **Resolve the Quadlet flag & dep hygiene.** Decide `use_quadlet_backends`' fate
|
||||||
|
(flip default + delete the legacy `create_container` branch, or freeze as experimental —
|
||||||
|
don't ship both half-maintained). Consolidate the mixed hyper 0.14/1.x ecosystem; bump
|
||||||
|
stale majors (reqwest, base64, thiserror, tokio-tungstenite).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## §H — Testing gaps that gate confidence (🟠)
|
||||||
|
|
||||||
|
- [ ] 🟠 **Add the OTA upgrade soak** (same as §B item 2) — the highest-value missing test.
|
||||||
|
- [ ] 🟡 **Add a host-reboot survival tier** — every app is `○` (untested) for reboot in
|
||||||
|
`TESTING.md:138`; the gate can't reboot the node it runs on. Run SSH-`reboot`-then-reprobe
|
||||||
|
out-of-band per node.
|
||||||
|
- [ ] 🟡 **Make the release gate run the full Rust suite** (or hard-require a green CI sha).
|
||||||
|
`tests/release/run.sh:101` runs only a 6-module slice because the full 1000-test suite
|
||||||
|
hangs PTYs on the dev box → 994 tests unverified at release time if CI is stale.
|
||||||
|
- [ ] 🟡 **Add `--max-time` to `node_rpc()`** (`tests/multinode/lib/multinode.bash`) — a slow
|
||||||
|
server-side RPC hangs the whole multinode suite with no feedback.
|
||||||
|
- [ ] 🟢 **De-hardcode creds/IPs in tests** (`tests/multinode/smoke.sh:32`,
|
||||||
|
`remote-lifecycle.sh:136`); snapshot/restore node baseline between destructive iterations
|
||||||
|
(teardown currently only clears `/tmp` session files).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## §I — Carried-over open items (from `UNIFIED-TASK-TRACKER.md`, still valid)
|
||||||
|
|
||||||
|
- [~] 🟠 **Multinode gate pass** — 5× destructive gate was launched on node `.5`; bring the
|
||||||
|
rest of the fleet to precondition, then run the existing (undocumented-but-present)
|
||||||
|
`tests/multinode/{smoke,meshtastic}.sh` cross-node suites.
|
||||||
|
- [ ] 🟠 **Federation `remove-node` tombstone regression.**
|
||||||
|
`federation/storage.rs:187` does `let _ = tombstone_did(...)` — swallows the write error,
|
||||||
|
so a removed peer reappears after the next sync. (This is a specific, confirmed instance
|
||||||
|
of the §C swallowed-writes class.) Needs a careful fix + `smoke.sh` re-verify.
|
||||||
|
- [ ] 🟠 **Phase-3 Quadlet default-flip** — validated + opt-in on .228/.198; flip
|
||||||
|
`config.rs:256` once the .5 gate reports clean.
|
||||||
|
- [ ] 🟠 **Developer CLI suite** (`archy app validate/render/install/test`) — gates external
|
||||||
|
app publishing (`APP-PACKAGING-MIGRATION-PLAN.md` step 5).
|
||||||
|
- [ ] ⛔🟡 **Version-naming decision** (`1.7.99-alpha` → `1.8.0` vs `1.8.00-alpha`) — a one-line
|
||||||
|
call, then a mechanical bump + tag. **Needs your decision.**
|
||||||
|
- [ ] ⛔🟢 **Bitcoin multi-version fleet OTA** — `.228` working on branch; rollout timing is
|
||||||
|
held for your call (`docs/bitcoin-version-bulletproof-rollout.md`).
|
||||||
|
- [ ] ⛔🟢 **3ccc stock-Meshtastic RF validation** — code fix in place; needs a live radio send.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Suggested order of attack
|
||||||
|
|
||||||
|
1. **The critical path:** §A signing ceremony → then turn on manifest/catalog/image
|
||||||
|
signature enforcement (§A) + OTA HTTPS/signature + deeper health check (§B).
|
||||||
|
2. **Cheap high-ROI stability:** §C swallowed-writes + blocking-calls; §D nostr-bridge
|
||||||
|
+ share-to-mesh origin checks; §H OTA soak + reboot tier.
|
||||||
|
3. **Image hardening:** rest of §F (per-device secrets, default creds, ISO signing,
|
||||||
|
firewall/unattended-upgrades, pinning).
|
||||||
|
4. **Polish, post-tag:** §G refactors, §E mesh persistence/dedup, §D bundle shrink.
|
||||||
|
5. **Decisions you own (⛔):** version name, signing mnemonic, bitcoin OTA timing, 3ccc test.
|
||||||
|
6. **Before public GA only (NOT alpha/beta):** remove + rotate the Anthropic key (§F) —
|
||||||
|
intentionally left in for frictionless AI during alpha/beta.
|
||||||
|
|
||||||
|
*Last updated: 2026-07-02 (initial deep-audit synthesis). Update this line + tick
|
||||||
|
boxes with commit shas as items land.*
|
||||||
File diff suppressed because it is too large
Load Diff
43
scripts/sign-catalog.sh
Executable file
43
scripts/sign-catalog.sh
Executable file
@ -0,0 +1,43 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# One-step release-catalog signer.
|
||||||
|
#
|
||||||
|
# Run: bash scripts/sign-catalog.sh
|
||||||
|
# Then: paste your 24-word release master mnemonic, press Enter, then Ctrl-D.
|
||||||
|
#
|
||||||
|
# It signs releases/app-catalog.json in place and checks the signature was made
|
||||||
|
# by the expected release-root key. Your mnemonic is read from the terminal only
|
||||||
|
# (never stored, never in shell history, never passed to Claude).
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
REPO="/home/archipelago/Projects/archy"
|
||||||
|
CATALOG="$REPO/releases/app-catalog.json"
|
||||||
|
EXPECTED_DID="did:key:z6MkkidEnEpo6qHMCNSZoNKWtvQvxq3whnaME9wGgEFhq7ur"
|
||||||
|
|
||||||
|
# Use ONLY the prebuilt signer. If it isn't ready, stop cleanly — never compile
|
||||||
|
# here (compiling caused the earlier hangs). Claude builds it in the background.
|
||||||
|
BIN="/tmp/archy-sign-bin/release/archipelago"
|
||||||
|
if [[ ! -x "$BIN" ]]; then
|
||||||
|
echo "⏳ The signer isn't ready yet — Claude is still building it."
|
||||||
|
echo " Wait until Claude says 'READY', then run this again. Nothing was changed."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
SIGN=("$BIN" ceremony sign "$CATALOG")
|
||||||
|
|
||||||
|
echo "════════════════════════════════════════════════════════════════"
|
||||||
|
echo " Paste your 24-word release master mnemonic below, press Enter,"
|
||||||
|
echo " then press Ctrl-D on a new line."
|
||||||
|
echo "════════════════════════════════════════════════════════════════"
|
||||||
|
"${SIGN[@]}"
|
||||||
|
|
||||||
|
# Verify the signature is present and made by the expected key.
|
||||||
|
echo
|
||||||
|
if grep -q "\"signed_by\": \"$EXPECTED_DID\"" "$CATALOG" \
|
||||||
|
&& grep -q '"signature":' "$CATALOG"; then
|
||||||
|
echo "✅ SUCCESS — catalog signed by the correct release-root key."
|
||||||
|
echo " Tell Claude \"signed\" and it will commit + push for you."
|
||||||
|
else
|
||||||
|
echo "❌ Something is off — the catalog is NOT signed by the expected key."
|
||||||
|
echo " Expected signer: $EXPECTED_DID"
|
||||||
|
echo " Do NOT commit. Check the mnemonic and re-run, or ask Claude."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
Loading…
x
Reference in New Issue
Block a user