Compare commits
2 Commits
7d89b4d8b2
...
d1cd42c821
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d1cd42c821 | ||
|
|
3e3016f2bd |
@ -294,6 +294,20 @@ async fn chown_for_rootless_container(uid_gid: &str, path: &str) -> Result<()> {
|
||||
))
|
||||
}
|
||||
|
||||
/// `(container-id, mount-dest)` pairs whose in-container chown returned a hard,
|
||||
/// permanent failure (e.g. "Operation not permitted" on a mount that can't be
|
||||
/// re-owned from inside the userns). Remembered for the life of the process so
|
||||
/// the per-reconcile repair stops re-attempting them — otherwise a single
|
||||
/// unrepairable mount (observed: mempool-api `/data`) burns CPU + floods the
|
||||
/// journal on every pass. Keyed by Id so a recreated container retries afresh.
|
||||
fn unrepairable_ownership() -> &'static std::sync::Mutex<std::collections::HashSet<(String, String)>>
|
||||
{
|
||||
static SET: std::sync::OnceLock<
|
||||
std::sync::Mutex<std::collections::HashSet<(String, String)>>,
|
||||
> = std::sync::OnceLock::new();
|
||||
SET.get_or_init(|| std::sync::Mutex::new(std::collections::HashSet::new()))
|
||||
}
|
||||
|
||||
/// App-agnostic, userns-mapping-proof volume-ownership repair for a RUNNING
|
||||
/// container.
|
||||
///
|
||||
@ -332,6 +346,13 @@ async fn ensure_running_container_ownership(name: &str) -> bool {
|
||||
.filter(|g| !g.is_empty())
|
||||
.unwrap_or_else(|| uid.clone());
|
||||
|
||||
// Stable identity of THIS container instance — used to remember mounts whose
|
||||
// chown is hard-unrepairable so we stop hammering them every reconcile. Keyed
|
||||
// by Id (not name) so a recreated container gets a fresh repair attempt.
|
||||
let cid = podman_stdout(&["inspect", name, "--format", "{{.Id}}"])
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
// Writable bind-mount destinations only.
|
||||
let dests = match podman_stdout(&[
|
||||
"inspect",
|
||||
@ -359,6 +380,19 @@ async fn ensure_running_container_ownership(name: &str) -> bool {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Known hard-unrepairable for this container instance (a previous chown
|
||||
// returned a permanent error like "Operation not permitted"). Skip the
|
||||
// probe+chown entirely — retrying every reconcile only burns CPU and
|
||||
// floods the journal; it will never succeed for this instance.
|
||||
if !cid.is_empty()
|
||||
&& unrepairable_ownership()
|
||||
.lock()
|
||||
.map(|s| s.contains(&(cid.clone(), dest.to_string())))
|
||||
.unwrap_or(false)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Drift check: can the service user write here already?
|
||||
let probe = format!(
|
||||
"t=\"{dest}/.archy-wtest.$$\"; touch \"$t\" 2>/dev/null && rm -f \"$t\" 2>/dev/null"
|
||||
@ -395,11 +429,21 @@ async fn ensure_running_container_ownership(name: &str) -> bool {
|
||||
"repaired unwritable volume ownership (in-container chown)"
|
||||
);
|
||||
}
|
||||
Ok(o) => tracing::warn!(
|
||||
Ok(o) => {
|
||||
// Permanent failure (e.g. "Operation not permitted" on a mount
|
||||
// that simply can't be re-owned from inside the userns). Record
|
||||
// it so we don't re-attempt every reconcile — log once, loudly.
|
||||
if !cid.is_empty() {
|
||||
if let Ok(mut s) = unrepairable_ownership().lock() {
|
||||
s.insert((cid.clone(), dest.to_string()));
|
||||
}
|
||||
}
|
||||
tracing::warn!(
|
||||
container = %name, dest,
|
||||
"volume ownership repair failed: {}",
|
||||
"volume ownership repair failed (won't retry for this container instance): {}",
|
||||
String::from_utf8_lossy(&o.stderr).trim()
|
||||
),
|
||||
)
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(container = %name, dest, "volume ownership repair errored: {e}")
|
||||
}
|
||||
|
||||
@ -1,9 +1,12 @@
|
||||
<template>
|
||||
<Teleport to="body">
|
||||
<!-- Offline Banner -->
|
||||
<!-- Lifecycle / Offline Banner.
|
||||
Server restart/shutdown is deliberate → shown immediately. A plain
|
||||
connection blip is debounced (showConnIssue) so transient sub-grace
|
||||
reconnects don't flash. -->
|
||||
<Transition name="conn-banner">
|
||||
<div
|
||||
v-if="isOffline && !store.isReconnecting && store.isAuthenticated"
|
||||
v-if="(showLifecycle || showConnectionLost)"
|
||||
class="conn-banner-overlay"
|
||||
>
|
||||
<div class="path-option-card px-6 py-3 border-l-4 border-yellow-500 inline-flex items-center gap-2 text-yellow-200 shadow-2xl">
|
||||
@ -17,10 +20,10 @@
|
||||
</div>
|
||||
</Transition>
|
||||
|
||||
<!-- Reconnecting Banner -->
|
||||
<!-- Reconnecting Banner (debounced) -->
|
||||
<Transition name="conn-banner">
|
||||
<div
|
||||
v-if="store.isReconnecting && store.isAuthenticated"
|
||||
v-if="showReconnecting"
|
||||
class="conn-banner-overlay"
|
||||
>
|
||||
<div class="path-option-card px-6 py-3 border-l-4 border-blue-500 inline-flex items-center gap-2 text-blue-200 shadow-2xl">
|
||||
@ -35,7 +38,7 @@
|
||||
</template>
|
||||
|
||||
<script setup lang="ts">
|
||||
import { computed } from 'vue'
|
||||
import { computed, ref, watch, onUnmounted } from 'vue'
|
||||
import { useAppStore } from '@/stores/app'
|
||||
|
||||
const store = useAppStore()
|
||||
@ -43,6 +46,58 @@ const store = useAppStore()
|
||||
const isOffline = computed(() => store.isOffline)
|
||||
const isRestarting = computed(() => store.isRestarting)
|
||||
const isShuttingDown = computed(() => store.isShuttingDown)
|
||||
|
||||
// A deliberate server lifecycle transition (restart/shutdown) is real and
|
||||
// user-initiated — surface it immediately, no debounce.
|
||||
const isLifecycleTransition = computed(() => isRestarting.value || isShuttingDown.value)
|
||||
const showLifecycle = computed(() => isLifecycleTransition.value && store.isAuthenticated)
|
||||
|
||||
// A plain connection blip (offline or reconnecting, not a lifecycle transition).
|
||||
// The overwhelming majority recover within a second or two (load spikes,
|
||||
// Tailscale/relay TCP resets), so showing the banner instantly makes a healthy
|
||||
// node read as unstable. Debounce: only surface after the issue persists past a
|
||||
// grace window; hide immediately on recovery.
|
||||
const hasConnIssue = computed(
|
||||
() => (store.isReconnecting || isOffline.value) && !isLifecycleTransition.value
|
||||
)
|
||||
|
||||
const SHOW_DELAY_MS = 2500
|
||||
const showConnIssue = ref(false)
|
||||
let pendingTimer: ReturnType<typeof setTimeout> | null = null
|
||||
|
||||
function clearTimer() {
|
||||
if (pendingTimer) {
|
||||
clearTimeout(pendingTimer)
|
||||
pendingTimer = null
|
||||
}
|
||||
}
|
||||
|
||||
watch(
|
||||
hasConnIssue,
|
||||
(issue) => {
|
||||
clearTimer()
|
||||
if (issue) {
|
||||
pendingTimer = setTimeout(() => {
|
||||
showConnIssue.value = true
|
||||
pendingTimer = null
|
||||
}, SHOW_DELAY_MS)
|
||||
} else {
|
||||
// Recovered before the grace window elapsed — hide at once.
|
||||
showConnIssue.value = false
|
||||
}
|
||||
},
|
||||
{ immediate: true }
|
||||
)
|
||||
|
||||
onUnmounted(clearTimer)
|
||||
|
||||
// Debounced visual states the template renders.
|
||||
const showReconnecting = computed(
|
||||
() => showConnIssue.value && store.isReconnecting && store.isAuthenticated
|
||||
)
|
||||
const showConnectionLost = computed(
|
||||
() => showConnIssue.value && isOffline.value && !store.isReconnecting && store.isAuthenticated
|
||||
)
|
||||
</script>
|
||||
|
||||
<style scoped>
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user