fix(fips,iso): bulletproof FIPS from install — no Activate button needed
Problems addressed (all observed on .198):
* fips_key was written as raw 32 bytes; upstream fips daemon reads it
with read_to_string() and bailed with "stream did not contain valid
UTF-8", crashlooping indefinitely.
* Activate button racy: user had to hit it, and it would keep failing
silently because the daemon couldn't parse its own config.
* FIPS schema drift (already fixed in 7d8a5864) put the config write
path behind the same broken "Activate" flow, so the fix alone
didn't help existing nodes.
* Journal was on tmpfs — every reboot wiped install/onboarding history,
making post-hoc debugging impossible.
Changes:
* identity.rs: write fips_key as bech32 nsec + newline. load_fips_keys
now auto-migrates legacy 32-byte files to bech32 the first time it
reads them, so OTA updates from v1.5.0-alpha self-heal without user
action.
* server.rs: post-onboarding auto-activate task runs on every
archipelago startup. If fips_key exists it ensures /etc/fips/fips.yaml
is schema-current and starts archipelago-fips.service. Pre-onboarding
nodes stay quiet (guarded on fips_key_exists).
* ISO build: un-mask archipelago-fips + archipelago-wg + wg-address —
all use ConditionPathExists on their key files, so systemd silently
skips them pre-onboarding (no MOTD [FAILED]). Only nostr-vpn stays
masked (legacy service, superseded by upstream fips).
* Journald made persistent via /var/log/journal + 500M cap, so
install and first-boot logs survive reboots for diagnosis.
After this, a fresh install + onboarding should bring FIPS up automatically
with no user interaction. The UI "Activate" button can stay as an escape
hatch (the RPC is still there) but is no longer on the critical path.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
00a86e6ecf
commit
d9411c3325
@ -200,7 +200,16 @@ async fn write_fips_key_from_seed(
|
|||||||
let key_path = identity_dir.join(FIPS_KEY_FILE);
|
let key_path = identity_dir.join(FIPS_KEY_FILE);
|
||||||
let pub_path = identity_dir.join(FIPS_KEY_PUB_FILE);
|
let pub_path = identity_dir.join(FIPS_KEY_PUB_FILE);
|
||||||
|
|
||||||
fs::write(&key_path, keys.secret_key().to_secret_bytes())
|
// fips daemon reads the key with `fs::read_to_string` and expects a
|
||||||
|
// bech32 nsec line — raw 32-byte secret bytes fail its UTF-8 check
|
||||||
|
// ("failed to read config file /etc/fips/fips.key: stream did not
|
||||||
|
// contain valid UTF-8"). Write the bech32 form with a trailing
|
||||||
|
// newline so both archipelago and fips load it cleanly.
|
||||||
|
let nsec = keys
|
||||||
|
.secret_key()
|
||||||
|
.to_bech32()
|
||||||
|
.context("Failed to encode FIPS nsec")?;
|
||||||
|
fs::write(&key_path, format!("{nsec}\n"))
|
||||||
.await
|
.await
|
||||||
.context("Failed to write FIPS key")?;
|
.context("Failed to write FIPS key")?;
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
@ -210,11 +219,11 @@ async fn write_fips_key_from_seed(
|
|||||||
.await
|
.await
|
||||||
.context("Failed to set FIPS key permissions")?;
|
.context("Failed to set FIPS key permissions")?;
|
||||||
}
|
}
|
||||||
fs::write(&pub_path, keys.public_key().to_bytes())
|
let npub = keys.public_key().to_bech32().unwrap_or_default();
|
||||||
|
fs::write(&pub_path, format!("{npub}\n"))
|
||||||
.await
|
.await
|
||||||
.context("Failed to write FIPS public key")?;
|
.context("Failed to write FIPS public key")?;
|
||||||
|
|
||||||
let npub = keys.public_key().to_bech32().unwrap_or_default();
|
|
||||||
tracing::info!(
|
tracing::info!(
|
||||||
"Derived FIPS mesh key from seed (npub: {}...)",
|
"Derived FIPS mesh key from seed (npub: {}...)",
|
||||||
npub.chars().take(20).collect::<String>()
|
npub.chars().take(20).collect::<String>()
|
||||||
@ -235,15 +244,50 @@ pub fn fips_key_exists(identity_dir: &Path) -> bool {
|
|||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub async fn load_fips_keys(identity_dir: &Path) -> Result<Option<nostr_sdk::Keys>> {
|
pub async fn load_fips_keys(identity_dir: &Path) -> Result<Option<nostr_sdk::Keys>> {
|
||||||
let key_path = identity_dir.join(FIPS_KEY_FILE);
|
let key_path = identity_dir.join(FIPS_KEY_FILE);
|
||||||
match fs::read(&key_path).await {
|
// Read as raw bytes so we can detect and migrate both formats:
|
||||||
Ok(bytes) => {
|
// - v1.6+: bech32 nsec text (what upstream fips expects)
|
||||||
let secret = nostr_sdk::SecretKey::from_slice(&bytes)
|
// - <=v1.5: raw 32-byte secret (incompatible with upstream fips)
|
||||||
.map_err(|e| anyhow::anyhow!("Corrupt FIPS key on disk: {}", e))?;
|
// When we find the legacy format, rewrite the file in bech32 in place
|
||||||
Ok(Some(nostr_sdk::Keys::new(secret)))
|
// so archipelago-fips.service stops crashlooping after an OTA update
|
||||||
|
// from a release that shipped the old format.
|
||||||
|
let bytes = match fs::read(&key_path).await {
|
||||||
|
Ok(b) => b,
|
||||||
|
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
|
||||||
|
Err(e) => return Err(e).context("Failed to read FIPS key"),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Try bech32 first.
|
||||||
|
if let Ok(text) = std::str::from_utf8(&bytes) {
|
||||||
|
if let Ok(secret) = nostr_sdk::SecretKey::parse(text.trim()) {
|
||||||
|
return Ok(Some(nostr_sdk::Keys::new(secret)));
|
||||||
}
|
}
|
||||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(None),
|
|
||||||
Err(e) => Err(e).context("Failed to read FIPS key"),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fall through: treat as legacy raw bytes and migrate.
|
||||||
|
if bytes.len() == 32 {
|
||||||
|
let secret = nostr_sdk::SecretKey::from_slice(&bytes)
|
||||||
|
.map_err(|e| anyhow::anyhow!("Corrupt FIPS key on disk: {}", e))?;
|
||||||
|
let nsec = secret
|
||||||
|
.to_bech32()
|
||||||
|
.map_err(|e| anyhow::anyhow!("Failed to encode migrated nsec: {}", e))?;
|
||||||
|
fs::write(&key_path, format!("{nsec}\n"))
|
||||||
|
.await
|
||||||
|
.context("Failed to rewrite FIPS key in bech32 format")?;
|
||||||
|
#[cfg(unix)]
|
||||||
|
{
|
||||||
|
use std::os::unix::fs::PermissionsExt;
|
||||||
|
fs::set_permissions(&key_path, std::fs::Permissions::from_mode(0o600))
|
||||||
|
.await
|
||||||
|
.context("Failed to re-set FIPS key permissions after migration")?;
|
||||||
|
}
|
||||||
|
tracing::info!("Migrated legacy raw-bytes FIPS key to bech32 nsec text");
|
||||||
|
return Ok(Some(nostr_sdk::Keys::new(secret)));
|
||||||
|
}
|
||||||
|
|
||||||
|
anyhow::bail!(
|
||||||
|
"Corrupt FIPS key on disk (not bech32 nsec and not 32 raw bytes, size={})",
|
||||||
|
bytes.len()
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the FIPS npub (bech32) if the key has been materialised.
|
/// Return the FIPS npub (bech32) if the key has been materialised.
|
||||||
|
|||||||
@ -459,6 +459,44 @@ impl Server {
|
|||||||
config.data_dir.clone(),
|
config.data_dir.clone(),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Post-onboarding auto-activation for archipelago-fips. Runs once
|
||||||
|
// at startup: if fips_key is on disk, install /etc/fips/fips.yaml
|
||||||
|
// (schema-refreshed) and start the service. This removes the
|
||||||
|
// need for a user-facing "Activate" button — the node comes up
|
||||||
|
// with FIPS running whenever the seed has been onboarded. Also
|
||||||
|
// self-heals legacy raw-byte fips.key files (load_fips_keys
|
||||||
|
// rewrites them as bech32 nsec the first time they're read).
|
||||||
|
// Pre-onboarding nodes: ConditionPathExists on the service unit
|
||||||
|
// + the `fips_key_exists` guard here keep this quiet.
|
||||||
|
{
|
||||||
|
let data_dir = config.data_dir.clone();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
let identity_dir = data_dir.join("identity");
|
||||||
|
if !crate::identity::fips_key_exists(&identity_dir) {
|
||||||
|
tracing::debug!("FIPS auto-activate skipped: fips_key not on disk");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Trigger the migration path in load_fips_keys so old raw-byte
|
||||||
|
// key files are rewritten as bech32 before fips.yaml install.
|
||||||
|
if let Err(e) = crate::identity::load_fips_keys(&identity_dir).await {
|
||||||
|
tracing::warn!("FIPS key load/migrate failed: {}", e);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if let Err(e) = crate::fips::config::install(&identity_dir).await {
|
||||||
|
tracing::warn!("FIPS config install failed on startup: {}", e);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if let Err(e) = crate::fips::service::activate(crate::fips::SERVICE_UNIT).await {
|
||||||
|
tracing::warn!(
|
||||||
|
"archipelago-fips activate failed on startup: {} — user can retry via fips.install RPC",
|
||||||
|
e
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
tracing::info!("archipelago-fips auto-activated on startup");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
_config: config,
|
_config: config,
|
||||||
_identity: identity,
|
_identity: identity,
|
||||||
|
|||||||
@ -453,30 +453,24 @@ RUN systemctl enable NetworkManager || true && \
|
|||||||
systemctl enable archipelago-reconcile.timer || true && \
|
systemctl enable archipelago-reconcile.timer || true && \
|
||||||
systemctl enable archipelago-tor-helper.path || true && \
|
systemctl enable archipelago-tor-helper.path || true && \
|
||||||
systemctl enable nostr-relay || true
|
systemctl enable nostr-relay || true
|
||||||
# archipelago-wg + wg-address: enabled by first-boot after WG key is generated
|
# archipelago-fips.service + archipelago-wg.service + archipelago-wg-address.service
|
||||||
# nostr-vpn: enabled by first-boot after Nostr identity is generated
|
# stay installed and enabled. They all use `ConditionPathExists=` on their
|
||||||
# (env file doesn't exist until onboarding, so pre-enabling causes crash-loop)
|
# respective seed-derived key files, so on a fresh pre-onboarding boot
|
||||||
# archipelago-fips: masked by default; archipelago backend unmasks +
|
# systemd quietly skips them with no [FAILED] in the MOTD. Once the user
|
||||||
# starts it via `fips.install` RPC once the seed-derived fips_key is on
|
# completes the seed onboarding flow, archipelago writes the key files,
|
||||||
# disk and the fips daemon package is installed. Pre-onboarding the node
|
# the archipelago backend calls `systemctl start archipelago-fips.service`
|
||||||
# stays dark on FIPS so no traffic leaves an ephemeral identity.
|
# (see server.rs post-onboarding auto-activate block) and the WG setup
|
||||||
RUN systemctl mask archipelago-fips.service || true
|
# path runs `archipelago-wg setup` directly. No masking, no user-facing
|
||||||
|
# "Activate" button — install → onboard → FIPS + WG are just running.
|
||||||
|
RUN systemctl enable archipelago-fips.service || true
|
||||||
|
|
||||||
# Same rationale for nostr-vpn and wireguard helpers — their env files
|
# nostr-vpn is the legacy nostr-tunnel service — deprecated in favour of
|
||||||
# don't exist until onboarding completes, so leaving these "enabled"
|
# the upstream FIPS daemon. It still crash-loops on boot if left enabled
|
||||||
# (the default from WantedBy=multi-user.target) produces a red
|
# (env file doesn't exist until onboarding) so we mask it outright.
|
||||||
# [FAILED] in the boot MOTD every reboot. Mask by replacing each
|
# `systemctl mask` alone doesn't stick because the real .service file is
|
||||||
# .service with a /dev/null symlink — plain `systemctl mask` refuses
|
# already in place — explicit rm + /dev/null symlink is what sticks.
|
||||||
# to clobber the real files we just COPY'd in, so the previous
|
RUN rm -f /etc/systemd/system/nostr-vpn.service && \\
|
||||||
# attempt left the services installable via dependency chains
|
ln -sf /dev/null /etc/systemd/system/nostr-vpn.service
|
||||||
# (nostr-relay has Before=nostr-vpn, which pulls it in). Explicit
|
|
||||||
# rm + ln -sf creates the proper masked state. The onboarding flow
|
|
||||||
# removes the symlink and drops in a configured service when env
|
|
||||||
# files are in place.
|
|
||||||
RUN for svc in nostr-vpn archipelago-wg archipelago-wg-address; do \\
|
|
||||||
rm -f /etc/systemd/system/\$svc.service; \\
|
|
||||||
ln -sf /dev/null /etc/systemd/system/\$svc.service; \\
|
|
||||||
done
|
|
||||||
|
|
||||||
# Remove policy-rc.d so services can start on first boot
|
# Remove policy-rc.d so services can start on first boot
|
||||||
RUN rm -f /usr/sbin/policy-rc.d
|
RUN rm -f /usr/sbin/policy-rc.d
|
||||||
@ -489,6 +483,15 @@ RUN mkdir -p /var/lib/archipelago/data /var/lib/archipelago/config /var/lib/arch
|
|||||||
cp /etc/archipelago/nostr-relay-config.toml /var/lib/archipelago/nostr-relay/config.toml && \
|
cp /etc/archipelago/nostr-relay-config.toml /var/lib/archipelago/nostr-relay/config.toml && \
|
||||||
chown -R archipelago:archipelago /var/lib/archipelago /opt/archipelago
|
chown -R archipelago:archipelago /var/lib/archipelago /opt/archipelago
|
||||||
|
|
||||||
|
# Persist journalctl across reboots — without /var/log/journal systemd
|
||||||
|
# journal uses tmpfs and everything before the last boot is lost. We
|
||||||
|
# need the full history to diagnose first-boot / install / onboarding
|
||||||
|
# issues after the fact. Size cap keeps it from eating the disk.
|
||||||
|
RUN mkdir -p /var/log/journal && \
|
||||||
|
systemd-tmpfiles --create --prefix /var/log/journal 2>/dev/null || true && \
|
||||||
|
install -d -m 0755 /etc/systemd/journald.conf.d && \
|
||||||
|
printf '[Journal]\nStorage=persistent\nSystemMaxUse=500M\nRuntimeMaxUse=100M\nForwardToSyslog=no\n' > /etc/systemd/journald.conf.d/10-archipelago-persistent.conf
|
||||||
|
|
||||||
# Clean up
|
# Clean up
|
||||||
RUN apt-get clean && \
|
RUN apt-get clean && \
|
||||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user