From abff99cbec637c3324301a89eb3fa451d180a5f9 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Fri, 13 Feb 2026 10:51:19 -0800
Subject: [PATCH 01/33] Set up local caching

---
 Cargo.lock                |  21 +++-
 Cargo.toml                |   4 +
 lib/cache/eviction/lru.rs | 210 ++++++++++++++++++++++++++++++++++++++
 lib/cache/eviction/mod.rs |   2 +
 lib/cache/fcache.rs       | 106 +++++++++++++++++++
 lib/cache/mod.rs          |   3 +
 lib/cache/traits.rs       |  86 ++++++++++++++++
 lib/io.rs                 |  17 +++
 lib/lib.rs                |   9 ++
 9 files changed, 457 insertions(+), 1 deletion(-)
 create mode 100644 lib/cache/eviction/lru.rs
 create mode 100644 lib/cache/eviction/mod.rs
 create mode 100644 lib/cache/fcache.rs
 create mode 100644 lib/cache/mod.rs
 create mode 100644 lib/cache/traits.rs
 create mode 100644 lib/io.rs
 create mode 100644 lib/lib.rs
diff --git a/Cargo.lock b/Cargo.lock
index 577f7b3..ebbe571 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -574,6 +574,12 @@ version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
 
+[[package]]
+name = "foldhash"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
+
 [[package]]
 name = "foreign-types"
 version = "0.3.2"
@@ -767,6 +773,7 @@ dependencies = [
  "dirs",
  "fuser",
  "futures",
+ "hashlink",
  "http",
  "inquire",
  "libc",
@@ -835,7 +842,7 @@ version = "0.15.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
 dependencies = [
- "foldhash",
+ "foldhash 0.1.5",
 ]
 
 [[package]]
@@ -843,6 +850,18 @@ name = "hashbrown"
 version = "0.16.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+dependencies = [
+ "foldhash 0.2.0",
+]
+
+[[package]]
+name = "hashlink"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea0b22561a9c04a7cb1a302c013e0259cd3b4bb619f145b32f72b8b4bcbed230"
+dependencies = [
+ "hashbrown 0.16.1",
+]
 
 [[package]]
 name = "heck"
diff --git a/Cargo.toml b/Cargo.toml
index 9256d6e..b6fe250 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,6 +7,9 @@ license = "MIT"
 repository = "https://github.com/mesa-dot-dev/git-fs"
 authors = ["Marko Vejnovic <marko@mesa.dev>"]
 
+[lib]
+path = "lib/lib.rs"
+
 [dependencies]
 clap = { version = "4.5.54", features = ["derive", "env"] }
 tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
@@ -47,6 +50,7 @@ opentelemetry = { version = "0.29" }
 opentelemetry_sdk = { version = "0.29", features = ["rt-tokio"] }
 opentelemetry-otlp = { version = "0.29", default-features = false, features = ["http-proto", "trace", "reqwest-blocking-client"] }
 tracing-opentelemetry = { version = "0.30" }
+hashlink = "0.11.0"
 
 [features]
 default = []
diff --git a/lib/cache/eviction/lru.rs b/lib/cache/eviction/lru.rs
new file mode 100644
index 0000000..7e21cda
--- /dev/null
+++ b/lib/cache/eviction/lru.rs
@@ -0,0 +1,210 @@
+//! Implements the LRU eviction policy.
+
+use std::{collections::HashSet, future::Future, hash::Hash, pin::Pin, sync::Arc};
+
+use hashlink::LinkedHashSet;
+use tokio::{sync::mpsc::Receiver, task::JoinHandle};
+
+/// A trait for deleting keys from the cache. This is used by the LRU eviction tracker to delete
+/// keys when they are evicted.
+pub trait Deleter<K>: Send + 'static {
+    /// Delete the given keys from the cache. The keys are guaranteed to be in the order of
+    /// eviction. You absolutely MUST delete the keys in the order they are given, otherwise the
+    /// LRU eviction tracker will get very confused and break.
+    fn delete<'a>(&mut self, keys: impl Iterator<Item = &'a K>)
+    where
+        K: 'a;
+}
+
+/// Messages sent to the LRU eviction tracker worker.
+#[derive(Debug, Clone)]
+enum Message<K> {
+    /// Notify the LRU eviction tracker that the given key was accessed.
+    Accessed(K),
+    /// Request an eviction set of the given size.
+    Evict(usize),
+    /// Notify the LRU eviction tracker that a given key was inserted.
+    Inserted(K),
+}
+
+#[derive(Debug)]
+struct LruProcessingTask<K: Copy, D: Deleter<K>> {
+    receiver: Receiver<Message<K>>,
+
+    /// The ordered set of keys, ordered according to the last-used policy.
+    ordered_key_set: LinkedHashSet<K>,
+
+    /// Tracks which keys are currently considered evicted. Read the long explanation in
+    /// service_message for why this is necessary.
+    evicted_keys: HashSet<K>,
+
+    /// The deleter to call when we need to evict keys.
+    deleter: D,
+}
+
+impl<K: Copy + Eq + Hash + Send + 'static, D: Deleter<K>> LruProcessingTask<K, D> {
+    fn new(deleter: D, receiver: Receiver<Message<K>>) -> Self {
+        Self {
+            receiver,
+            ordered_key_set: LinkedHashSet::new(),
+            evicted_keys: HashSet::new(),
+            deleter,
+        }
+    }
+
+    fn spawn_task(deleter: D, receiver: Receiver<Message<K>>) -> JoinHandle<()> {
+        // TODO(markovejnovic): This should have a best-effort drop.
+        tokio::spawn(async move {
+            let mut task = Self::new(deleter, receiver);
+            task.work().await;
+        })
+    }
+
+    async fn work(&mut self) {
+        while let Some(msg) = self.receiver.recv().await
+            && self.service_message(msg).await
+        {}
+    }
+
+    /// Returns true if the task should continue working.
+    #[must_use]
+    fn service_message(
+        &mut self,
+        message: Message<K>,
+    ) -> Pin<Box<dyn Future<Output = bool> + Send + '_>> {
+        Box::pin(async move {
+            match message {
+                Message::Accessed(k) => {
+                    if self.evicted_keys.contains(&k) {
+                        // This is a ghost access, happens when a client accesses a key that may not
+                        // have been cleaned up yet. Just ignore it.
+                        return true;
+                    }
+
+                    self.reposition_existing_key(k);
+                }
+                Message::Evict(max_count) => {
+                    // Before we send off the eviction set, we actually need to remove the evicted
+                    // keys from the pending queue. Read the following for an explanation.
+                    //
+                    // The client may access a key AFTER an eviction notice. Until the eviction
+                    // notice is processed by the client, the client will have the key available to
+                    // it.
+                    //
+                    // Consequently, the client may very well access the key after it sent out an
+                    // eviction notice, but before the eviction notice is processed. This will
+                    // result in the key being added as an access message after the eviction
+                    // notice, but before the eviction set is sent to the client.
+                    //
+                    // The problem is, the LruProcessingTask has no way of knowing whether the
+                    // message the client sent is a stale message, or a legit message.
+                    //
+                    // Consider the following queue state in between two loop iterations of the
+                    // LruProcessingTask:
+                    // A -- access
+                    // B -- insert
+                    // E -- eviction request
+                    // C -- the client thread which made the request
+                    //
+                    // A1 A2 A3 E1 A1 B1 -- after this point, no clients can access A1
+                    // C1 C1 C1 C1 C2 C1 ---------------------------------------------> time
+                    //
+                    // The scenario here is:
+                    // - A1, A2, A3 are used by the clients.
+                    // - C1 wants to add B1, but it doesn't have enough space, so it sends an
+                    //   eviction request.
+                    // - In parallel, C2 accesses A1, which is completely legal.
+                    //
+                    // The result is that our queue has A1, even after we sent out the eviction
+                    // request, and we have no way of knowing whether A1 is a stale message or not.
+                    //
+                    // To prevent this whole "is this thing stale or not" problem, we need to
+                    // "erase" keys evicted in the future. Unfortunately, you can't mutate queues,
+                    // so the best we can do is simply mark the stupid keys as "evicted" and ignore
+                    // any accesses from these keys until they're marked as "inserted" again.
+                    //
+                    // We cannot mutate the queue to mark messages as stale, so the best we can do
+                    // is mark the keys as "evicted" and ignore any accesses from these keys until
+                    // they're marked as "inserted" again.
+                    {
+                        let take_count = self.ordered_key_set.len().min(max_count);
+                        let eviction_set_it = self.ordered_key_set.iter().take(take_count);
+                        self.evicted_keys.extend(eviction_set_it.clone());
+                        self.deleter.delete(eviction_set_it);
+                        for _ in 0..take_count {
+                            if let None = self.ordered_key_set.pop_front() {
+                                break;
+                            }
+                        }
+                    }
+                }
+                Message::Inserted(k) => {
+                    debug_assert!(!self.ordered_key_set.contains(&k));
+
+                    // If the key has been evicted, but is now inserted, that means the key is no
+                    // longer stale.
+                    if self.evicted_keys.contains(&k) {
+                        self.evicted_keys.remove(&k);
+                    }
+
+                    self.ordered_key_set.insert(k);
+                }
+            }
+
+            true
+        })
+    }
+
+    fn reposition_existing_key(&mut self, key: K) {
+        debug_assert!(self.ordered_key_set.contains(&key));
+
+        self.ordered_key_set.remove(&key);
+        self.ordered_key_set.insert(key);
+    }
+}
+
+/// An LRU eviction tracker. This is used to track the least recently used keys in the cache, and
+/// to evict keys when necessary.
+#[derive(Debug, Clone)]
+pub struct LruEvictionTracker<K> {
+    worker_message_sender: tokio::sync::mpsc::Sender<Message<K>>,
+    _worker: Arc<JoinHandle<()>>,
+}
+
+impl<K: Copy + Eq + Send + Hash + 'static> LruEvictionTracker<K> {
+    /// Spawn a new LRU eviction tracker with the given deleter and channel size.
+    pub fn spawn<D: Deleter<K>>(deleter: D, channel_size: usize) -> Self {
+        let (tx, rx) = tokio::sync::mpsc::channel(channel_size);
+
+        Self {
+            worker_message_sender: tx,
+            _worker: Arc::new(LruProcessingTask::spawn_task(deleter, rx)),
+        }
+    }
+
+    /// Notify the LRU eviction tracker that the given key was inserted.
+    ///
+    /// You MUST call this method for every new key that is inserted into the cache.
+    pub async fn insert(&mut self, key: K) {
+        self.send_msg(Message::Inserted(key)).await;
+    }
+
+    /// Notify the LRU eviction tracker that the given key was accessed.
+    ///
+    /// You MUST call this method for every read or update to a key.
+    pub async fn access(&mut self, key: K) {
+        self.send_msg(Message::Accessed(key)).await;
+    }
+
+    /// Cull the least recently used keys with the given deletion method.
+    pub async fn cull(&mut self, max_count: usize) {
+        self.send_msg(Message::Evict(max_count)).await;
+    }
+
+    /// Send a message to the worker. This is a helper method to reduce code duplication.
+    async fn send_msg(&mut self, message: Message<K>) {
+        if let Err(_) = self.worker_message_sender.send(message.clone()).await {
+            unreachable!();
+        }
+    }
+}
diff --git a/lib/cache/eviction/mod.rs b/lib/cache/eviction/mod.rs
new file mode 100644
index 0000000..94ca450
--- /dev/null
+++ b/lib/cache/eviction/mod.rs
@@ -0,0 +1,2 @@
+/// Different types of eviction policies for any cache.
+pub mod lru;
diff --git a/lib/cache/fcache.rs b/lib/cache/fcache.rs
new file mode 100644
index 0000000..388d591
--- /dev/null
+++ b/lib/cache/fcache.rs
@@ -0,0 +1,106 @@
+//! File-based cache.
+
+use std::{
+    hash::Hash,
+    path::{Path, PathBuf},
+    sync::atomic::AtomicUsize,
+};
+
+use crate::{cache::traits::AsyncReadableCache, io};
+use thiserror::Error;
+
+use tokio::io::AsyncReadExt;
+
+#[derive(Debug, Error)]
+pub enum InvalidRootPathError {
+    #[error("Root path is not a directory: {0}")]
+    NotADirectory(PathBuf),
+
+    #[error("Root path appears to contain data stemming from sources different to this app: {0}")]
+    RootPathUnsafeCache(PathBuf),
+
+    #[error("IO error while accessing root path: {0}")]
+    Io(#[from] std::io::Error),
+}
+
+pub struct FileCache<K: Eq + Hash> {
+    root_path: PathBuf,
+    map: scc::HashMap<K, usize>,
+    file_generator: AtomicUsize,
+}
+
+impl<K: Eq + Hash> FileCache<K> {
+    // Dangerous: Changing this constant may cause the program to treat existing cache directories
+    // as invalid, and thus provide a worse user experience. Do not change this unless you have a
+    // very good reason to do so. Changing this will break backwards compatibility with existing
+    // cache directories.
+    const GITFS_MARKER_FILE: &'static str = ".gitfs_cache";
+
+    /// Try to create a new file cache at the given path.
+    ///
+    /// If the path exists, it must either be an empty directory, or a directory which was
+    /// previously used as a cache for this program.
+    pub async fn new(file_path: &Path) -> Result<Self, InvalidRootPathError> {
+        let mut pbuf = match tokio::fs::canonicalize(file_path).await {
+            Ok(mut p) => {
+                if !tokio::fs::metadata(&p).await?.is_dir() {
+                    return Err(InvalidRootPathError::NotADirectory(p));
+                }
+
+                let mut entries = tokio::fs::read_dir(&p).await?;
+                let is_empty = entries.next_entry().await?.is_none();
+
+                p.push(Self::GITFS_MARKER_FILE);
+                let marker_exists = tokio::fs::try_exists(&p).await?;
+                p.pop();
+
+                if !(is_empty || marker_exists) {
+                    return Err(InvalidRootPathError::RootPathUnsafeCache(p));
+                }
+
+                io::remove_dir_contents(&p).await?;
+
+                Ok(p)
+            }
+            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
+                tokio::fs::create_dir_all(file_path).await?;
+                tokio::fs::canonicalize(file_path).await
+            }
+            Err(e) => return Err(e.into()),
+        }?;
+
+        // Create marker file so that subsequent restarts of this application gracefully handle the
+        // existing cache directory.
+        pbuf.push(Self::GITFS_MARKER_FILE);
+        tokio::fs::OpenOptions::new()
+            .create(true)
+            .write(true)
+            .open(&pbuf)
+            .await?;
+        pbuf.pop();
+
+        Ok(Self {
+            root_path: pbuf,
+            map: scc::HashMap::new(),
+            file_generator: AtomicUsize::new(0),
+        })
+    }
+}
+
+impl<K: Eq + Hash> AsyncReadableCache<K, Vec<u8>> for FileCache<K> {
+    async fn get(&self, key: &K) -> Option<Vec<u8>> {
+        let mut file = {
+            let entry = self.map.get_async(key).await?;
+            let path = self.root_path.join(entry.get().to_string());
+            tokio::fs::File::open(&path).await.ok()?
+        };
+
+        let mut buf = Vec::new();
+        file.read_to_end(&mut buf).await.ok()?;
+        Some(buf)
+    }
+
+    async fn contains(&self, key: &K) -> bool {
+        self.map.contains_async(key).await
+    }
+}
diff --git a/lib/cache/mod.rs b/lib/cache/mod.rs
new file mode 100644
index 0000000..1bb3741
--- /dev/null
+++ b/lib/cache/mod.rs
@@ -0,0 +1,3 @@
+pub mod eviction;
+pub mod fcache;
+pub mod traits;
diff --git a/lib/cache/traits.rs b/lib/cache/traits.rs
new file mode 100644
index 0000000..8e167be
--- /dev/null
+++ b/lib/cache/traits.rs
@@ -0,0 +1,86 @@
+use std::{future::Future, hash::Hash, pin::Pin};
+
+pub trait ReadableCache<K, V>
+where
+    K: Eq + Hash,
+    V: Clone,
+{
+    type Fut<'a, T: 'a>: Future<Output = T> + 'a
+    where
+        Self: 'a,
+        K: 'a,
+        V: 'a;
+
+    fn get<'a>(&'a self, key: &'a K) -> Self::Fut<'a, Option<V>>
+    where
+        V: 'a;
+    fn contains<'a>(&'a self, key: &'a K) -> Self::Fut<'a, bool>
+    where
+        V: 'a;
+}
+
+pub trait SyncReadableCache<K, V>
+where
+    K: Eq + Hash,
+    V: Clone,
+{
+    fn get(&self, key: &K) -> Option<V>;
+    fn contains(&self, key: &K) -> bool;
+}
+
+pub trait AsyncReadableCache<K, V>
+where
+    K: Eq + Hash,
+    V: Clone,
+{
+    async fn get(&self, key: &K) -> Option<V>;
+    async fn contains(&self, key: &K) -> bool;
+}
+
+impl<K: Eq + Hash, V: Clone, T: AsyncReadableCache<K, V>> !SyncReadableCache<K, V> for T {}
+
+impl<K: Eq + Hash, V: Clone, T: AsyncReadableCache<K, V>> ReadableCache<K, V> for T {
+    type Fut<'a, O: 'a>
+        = Pin<Box<dyn Future<Output = O> + 'a>>
+    where
+        Self: 'a,
+        K: 'a,
+        V: 'a;
+
+    fn get<'a>(&'a self, key: &'a K) -> Self::Fut<'a, Option<V>>
+    where
+        V: 'a,
+    {
+        Box::pin(AsyncReadableCache::get(self, key))
+    }
+
+    fn contains<'a>(&'a self, key: &'a K) -> Self::Fut<'a, bool>
+    where
+        V: 'a,
+    {
+        Box::pin(AsyncReadableCache::contains(self, key))
+    }
+}
+
+impl<K: Eq + Hash, V: Clone, T: SyncReadableCache<K, V>> ReadableCache<K, V> for T {
+    type Fut<'a, O: 'a>
+        = std::future::Ready<O>
+    where
+        Self: 'a,
+        K: 'a,
+        V: 'a;
+
+    fn get<'a>(&'a self, key: &'a K) -> Self::Fut<'a, Option<V>>
+    where
+        V: 'a,
+    {
+        std::future::ready(SyncReadableCache::get(self, key))
+    }
+
+    fn contains<'a>(&'a self, key: &'a K) -> Self::Fut<'a, bool>
+    where
+        V: 'a,
+    {
+        std::future::ready(SyncReadableCache::contains(self, key))
+    }
+}
diff --git a/lib/io.rs b/lib/io.rs
new file mode 100644
index 0000000..95a5d97
--- /dev/null
+++ b/lib/io.rs
@@ -0,0 +1,17 @@
+//! Random IO utilities
+
+use std::path::Path;
+
+/// Remove all files and directories in the given directory, but not the directory itself.
+pub async fn remove_dir_contents(path: &Path) -> std::io::Result<()> {
+    let mut entries = tokio::fs::read_dir(path).await?;
+    while let Some(entry) = entries.next_entry().await? {
+        let path = entry.path();
+        if tokio::fs::metadata(&path).await?.is_dir() {
+            tokio::fs::remove_dir_all(path).await?;
+        } else {
+            tokio::fs::remove_file(path).await?;
+        }
+    }
+    Ok(())
+}
diff --git a/lib/lib.rs b/lib/lib.rs
new file mode 100644
index 0000000..058aae4
--- /dev/null
+++ b/lib/lib.rs
@@ -0,0 +1,9 @@
+//! git-fs shared library.
+
+#![feature(negative_impls)]
+#![feature(with_negative_coherence)]
+#![feature(fn_traits)]
+
+/// Caching primitives for git-fs.
+pub mod cache;
+pub mod io;

From b5594f97b47cdb8b444fb754d251a70be2e0a371 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Fri, 13 Feb 2026 11:01:17 -0800
Subject: [PATCH 02/33] hookup LRU cache to fcache.rs

---
 lib/cache/eviction/lru.rs |  8 ++++----
 lib/cache/fcache.rs       | 41 ++++++++++++++++++++++++++++++++++++---
 2 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/lib/cache/eviction/lru.rs b/lib/cache/eviction/lru.rs
index 7e21cda..0faa7fc 100644
--- a/lib/cache/eviction/lru.rs
+++ b/lib/cache/eviction/lru.rs
@@ -185,24 +185,24 @@ impl<K: Copy + Eq + Send + Hash + 'static> LruEvictionTracker<K> {
     /// Notify the LRU eviction tracker that the given key was inserted.
     ///
     /// You MUST call this method for every new key that is inserted into the cache.
-    pub async fn insert(&mut self, key: K) {
+    pub async fn insert(&self, key: K) {
         self.send_msg(Message::Inserted(key)).await;
     }
 
     /// Notify the LRU eviction tracker that the given key was accessed.
     ///
     /// You MUST call this method for every read or update to a key.
-    pub async fn access(&mut self, key: K) {
+    pub async fn access(&self, key: K) {
         self.send_msg(Message::Accessed(key)).await;
     }
 
     /// Cull the least recently used keys with the given deletion method.
-    pub async fn cull(&mut self, max_count: usize) {
+    pub async fn cull(&self, max_count: usize) {
         self.send_msg(Message::Evict(max_count)).await;
     }
 
     /// Send a message to the worker. This is a helper method to reduce code duplication.
-    async fn send_msg(&mut self, message: Message<K>) {
+    async fn send_msg(&self, message: Message<K>) {
         if let Err(_) = self.worker_message_sender.send(message.clone()).await {
             unreachable!();
         }
diff --git a/lib/cache/fcache.rs b/lib/cache/fcache.rs
index 388d591..6c04d5d 100644
--- a/lib/cache/fcache.rs
+++ b/lib/cache/fcache.rs
@@ -6,11 +6,35 @@ use std::{
     sync::atomic::AtomicUsize,
 };
 
-use crate::{cache::traits::AsyncReadableCache, io};
+use crate::{
+    cache::{
+        eviction::lru::{Deleter, LruEvictionTracker},
+        traits::AsyncReadableCache,
+    },
+    io,
+};
 use thiserror::Error;
 
 use tokio::io::AsyncReadExt;
 
+struct LruEntryDeleter;
+
+impl LruEntryDeleter {
+    fn new() -> Self {
+        Self
+    }
+}
+
+impl<K> Deleter<K> for LruEntryDeleter {
+    fn delete<'a>(&mut self, keys: impl Iterator<Item = &'a K>)
+    where
+        K: 'a,
+    {
+        // TODO(markovejnovic): Implement this.
+        unimplemented!()
+    }
+}
+
 #[derive(Debug, Error)]
 pub enum InvalidRootPathError {
     #[error("Root path is not a directory: {0}")]
@@ -27,9 +51,15 @@ pub struct FileCache<K: Eq + Hash> {
     root_path: PathBuf,
     map: scc::HashMap<K, usize>,
     file_generator: AtomicUsize,
+    lru_tracker: LruEvictionTracker<K>,
 }
 
-impl<K: Eq + Hash> FileCache<K> {
+impl<K: Eq + Hash + Send + Copy + 'static> FileCache<K> {
+    // How many cache entries to evict at most in a single batch.
+    //
+    // Not really sure how to determine this number.
+    const LRU_EVICTION_MAX_BATCH_SIZE: usize = 32;
+
     // Dangerous: Changing this constant may cause the program to treat existing cache directories
     // as invalid, and thus provide a worse user experience. Do not change this unless you have a
     // very good reason to do so. Changing this will break backwards compatibility with existing
@@ -83,17 +113,22 @@ impl<K: Eq + Hash> FileCache<K> {
             root_path: pbuf,
             map: scc::HashMap::new(),
             file_generator: AtomicUsize::new(0),
+            lru_tracker: LruEvictionTracker::spawn(
+                LruEntryDeleter::new(),
+                Self::LRU_EVICTION_MAX_BATCH_SIZE,
+            ),
         })
     }
 }
 
-impl<K: Eq + Hash> AsyncReadableCache<K, Vec<u8>> for FileCache<K> {
+impl<K: Eq + Hash + Copy + Send + 'static> AsyncReadableCache<K, Vec<u8>> for FileCache<K> {
     async fn get(&self, key: &K) -> Option<Vec<u8>> {
         let mut file = {
             let entry = self.map.get_async(key).await?;
             let path = self.root_path.join(entry.get().to_string());
             tokio::fs::File::open(&path).await.ok()?
         };
+        self.lru_tracker.access(*key).await;
 
         let mut buf = Vec::new();
         file.read_to_end(&mut buf).await.ok()?;

From 26ecc8006789ae143421aa5d8dc17d10619c2226 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Fri, 13 Feb 2026 14:15:38 -0800
Subject: [PATCH 03/33] before claude

---
 Cargo.lock          |   1 +
 Cargo.toml          |   1 +
 lib/cache/fcache.rs | 284 ++++++++++++++++++++++++++++++++++++++++----
 lib/cache/traits.rs | 141 ++++++++++++++++++++++
 4 files changed, 402 insertions(+), 25 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index ebbe571..93ac821 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -788,6 +788,7 @@ dependencies = [
  "reqwest-middleware",
  "rustc-hash",
  "scc",
+ "scopeguard",
  "secrecy",
  "self_update",
  "semver",
diff --git a/Cargo.toml b/Cargo.toml
index b6fe250..5ea9bb9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -51,6 +51,7 @@ opentelemetry_sdk = { version = "0.29", features = ["rt-tokio"] }
 opentelemetry-otlp = { version = "0.29", default-features = false, features = ["http-proto", "trace", "reqwest-blocking-client"] }
 tracing-opentelemetry = { version = "0.30" }
 hashlink = "0.11.0"
+scopeguard = "1.2.0"
 
 [features]
 default = []
diff --git a/lib/cache/fcache.rs b/lib/cache/fcache.rs
index 6c04d5d..214cad5 100644
--- a/lib/cache/fcache.rs
+++ b/lib/cache/fcache.rs
@@ -1,56 +1,121 @@
-//! File-based cache.
+//! File-based MT-safe, async-safe data cache.
 
 use std::{
+    fmt::Debug,
     hash::Hash,
     path::{Path, PathBuf},
-    sync::atomic::AtomicUsize,
+    sync::{Arc, atomic::AtomicUsize},
 };
 
+use tracing::error;
+
 use crate::{
     cache::{
         eviction::lru::{Deleter, LruEvictionTracker},
-        traits::AsyncReadableCache,
+        traits::{AsyncReadableCache, AsyncWritableCache},
     },
     io,
 };
 use thiserror::Error;
 
-use tokio::io::AsyncReadExt;
+use tokio::io::{AsyncReadExt, AsyncWriteExt};
 
-struct LruEntryDeleter;
+struct LruEntryDeleter<K: Eq + Hash> {
+    file_cache: Arc<FileCache<K>>,
+}
 
-impl LruEntryDeleter {
-    fn new() -> Self {
-        Self
+impl<K: Eq + Hash> LruEntryDeleter<K> {
+    fn new(file_cache: Arc<FileCache<K>>) -> Self {
+        Self { file_cache }
     }
 }
 
-impl<K> Deleter<K> for LruEntryDeleter {
+impl<K: Copy + Eq + Hash + Sync + Send + 'static> Deleter<K> for LruEntryDeleter<K> {
+    /// The LRU cache will call this method when it wants to evict keys.
     fn delete<'a>(&mut self, keys: impl Iterator<Item = &'a K>)
     where
         K: 'a,
     {
-        // TODO(markovejnovic): Implement this.
-        unimplemented!()
+        for key in keys {
+            // Deleting keys should be atomic from scc::HashMap, which means that all subsequent
+            // lookups on the same key will fail, and thus we'll be a-ok.
+            if let Some(entry) = self.file_cache.map.remove_sync(key) {
+                // On the other hand, deleting the file may fail for a variety of reasons, but
+                // that's all mostly managed by the delete_entry_from_disk_sync method, which
+                // gracefully handles the case where the file is already deleted.
+                self.file_cache.delete_entry_from_disk_sync(&entry.1);
+            }
+        }
     }
 }
 
+/// Error thrown during construction of a FileCache which describes why the given root path is
+/// invalid.
 #[derive(Debug, Error)]
 pub enum InvalidRootPathError {
+    /// The root path exists but isn't a directory.
     #[error("Root path is not a directory: {0}")]
     NotADirectory(PathBuf),
 
+    /// The root path exists and is a directory, but it isn't empty and the data in it appears to
+    /// come from a different source than this application.
     #[error("Root path appears to contain data stemming from sources different to this app: {0}")]
     RootPathUnsafeCache(PathBuf),
 
+    /// An IO error occurred while trying to access the root path.
     #[error("IO error while accessing root path: {0}")]
     Io(#[from] std::io::Error),
 }
 
+/// Error thrown during insertion into the cache.
+#[derive(Debug, Error)]
+pub enum CacheWriteError {
+    /// An IO error occurred while trying to write the new value to disk.
+    #[error("IO error while inserting into cache: {0}")]
+    Io(#[from] std::io::Error),
+}
+
+#[derive(Debug, Clone, Copy)]
+struct CacheMapEntry {
+    /// A unique ID representing the name of the file on disk where the value for this cache entry
+    /// is stored. This is just an integer that is used to generate the file path for the cache
+    /// entry.
+    fid: usize,
+
+    /// The size of the value for this cache entry in bytes. This is used to keep track of the
+    /// total size of the cache, and to evict entries when necessary.
+    size_bytes: usize,
+}
+
+/// A general-purpose file-backed cache. This cache is designed to store arbitrary byte values on
+/// disk, and to retrieve them later using a key.
+///
+/// This cache is considered thread-safe and async-safe.
 pub struct FileCache<K: Eq + Hash> {
+    /// The root path for the cache. This is the directory where all cache files will be stored.
     root_path: PathBuf,
-    map: scc::HashMap<K, usize>,
+
+    /// The main map of the cache. This maps keys to file IDs, which are just integers that
+    /// correspond to file paths on disk. The file ID is used to generate the file path for the
+    /// cache entry.
+    map: scc::HashMap<K, CacheMapEntry>,
+
+    /// Generates unique file paths for new cache entries. This is just a counter that increments
+    /// for each new file, and the file ID is the value of the counter at the time of creation.
     file_generator: AtomicUsize,
+
+    /// Rough estimate of the current size of the cache in bytes. Not exact, but should be close
+    /// enough for eviction purposes.
+    size_bytes: AtomicUsize,
+
+    /// The maximum size of the cache in bytes. This is used to determine when to evict entries
+    /// from the cache. This is a soft hint, rather than a hard limit, and the cache may
+    /// temporarily exceed this size during insertions, but it will try to evict entries as soon as
+    /// possible to get back under this limit.
+    max_size_bytes: usize,
+
+    /// LRU eviction tracker. This is used to track the least recently used keys in the cache, and
+    /// to evict keys when necessary.
     lru_tracker: LruEvictionTracker<K>,
 }
 
@@ -66,11 +131,28 @@ impl<K: Eq + Hash + Send + Copy + 'static> FileCache<K> {
     // cache directories.
     const GITFS_MARKER_FILE: &'static str = ".gitfs_cache";
 
+    // The total maximum number of times we will try to read a "deleted" file before giving up and
+    // treating it as a hard error.
+    //
+    // See usage for reasoning on why this is necessary.
+    const MAX_READ_RETRY_COUNT: usize = 8;
+
     /// Try to create a new file cache at the given path.
     ///
-    /// If the path exists, it must either be an empty directory, or a directory which was
-    /// previously used as a cache for this program.
-    pub async fn new(file_path: &Path) -> Result<Self, InvalidRootPathError> {
+    ///
+    /// # Args
+    ///
+    /// * `file_path` - If the path exists, it must either be an empty directory, or a directory
+    ///                 which was previously used as a cache for this program.
+    /// * `max_size_bytes` - The maximum size of the cache in bytes. This is used to determine when
+    ///                      to evict entries from the cache. This is a soft hint, rather than a
+    ///                      hard limit, and the cache may temporarily exceed this size during
+    ///                      insertions, but it will try to evict entries as soon as possible to
+    ///                      get back under this limit.
+    pub async fn new(
+        file_path: &Path,
+        max_size_bytes: usize,
+    ) -> Result<Self, InvalidRootPathError> {
         let mut pbuf = match tokio::fs::canonicalize(file_path).await {
             Ok(mut p) => {
                 if !tokio::fs::metadata(&p).await?.is_dir() {
@@ -113,29 +195,181 @@ impl<K: Eq + Hash + Send + Copy + 'static> FileCache<K> {
             root_path: pbuf,
             map: scc::HashMap::new(),
             file_generator: AtomicUsize::new(0),
+            size_bytes: AtomicUsize::new(0),
+            max_size_bytes,
             lru_tracker: LruEvictionTracker::spawn(
                 LruEntryDeleter::new(),
                 Self::LRU_EVICTION_MAX_BATCH_SIZE,
             ),
         })
     }
+
+    /// Retrieve the correct path for the given file ID.
+    fn path_for(&self, fid: usize) -> PathBuf {
+        self.root_path.join(fid.to_string())
+    }
+
+    async fn create_file(&self, path: &Path) -> Result<tokio::fs::File, std::io::Error> {
+        tokio::fs::OpenOptions::new()
+            .create(true)
+            .write(true)
+            .open(path)
+            .await
+    }
+
+    /// Given an entry, remove it from the disk and update the size of the cache accordingly.
+    ///
+    /// Gracefully handles the case where the file doesn't exist, which can happen if the file was
+    /// deleted after we read the file ID from the map, but before we tried to delete
+    /// the file.
+    async fn delete_entry_from_disk_async(&self, entry: &CacheMapEntry) -> std::io::Result<()> {
+        match tokio::fs::remove_file(self.path_for(entry.fid)).await {
+            Ok(()) => {
+                self.size_bytes
+                    .fetch_sub(entry.size_bytes, std::sync::atomic::Ordering::Relaxed);
+                Ok(())
+            }
+            // Note that there's a race condition between the deleter and the mutator in
+            // AsyncWritableCache. Either one of these operations atomically grabs the file ID (so
+            // accessing the file ID is safe), but then they race to delete the file.
+            //
+            // Consequently, it's quite possible that the file is already deleted by the time we
+            // try to delete it, and that's already fine with us, so we just treat that case as a
+            // successful deletion.
+            Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
+            Err(e) => Err(e),
+        }
+    }
+
+    /// Given an entry, remove it from the disk and update the size of the cache accordingly.
+    ///
+    /// Gracefully handles the case where the file doesn't exist, which can happen if the file was
+    /// deleted after we read the file ID from the map, but before we tried to delete
+    /// the file.
+    fn delete_entry_from_disk_sync(&self, entry: &CacheMapEntry) -> std::io::Result<()> {
+        match std::fs::remove_file(self.path_for(entry.fid)) {
+            Ok(()) => {
+                self.size_bytes
+                    .fetch_sub(entry.size_bytes, std::sync::atomic::Ordering::Relaxed);
+                Ok(())
+            }
+            // Note that there's a race condition between the deleter and the mutator in
+            // AsyncWritableCache. Either one of these operations atomically grabs the file ID (so
+            // accessing the file ID is safe), but then they race to delete the file.
+            //
+            // Consequently, it's quite possible that the file is already deleted by the time we
+            // try to delete it, and that's already fine with us, so we just treat that case as a
+            // successful deletion.
+            Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
+            Err(e) => Err(e),
+        }
+    }
 }
 
-impl<K: Eq + Hash + Copy + Send + 'static> AsyncReadableCache<K, Vec<u8>> for FileCache<K> {
+impl<K: Eq + Hash + Copy + Send + Debug + 'static> AsyncReadableCache<K, Vec<u8>> for FileCache<K> {
     async fn get(&self, key: &K) -> Option<Vec<u8>> {
-        let mut file = {
-            let entry = self.map.get_async(key).await?;
-            let path = self.root_path.join(entry.get().to_string());
-            tokio::fs::File::open(&path).await.ok()?
-        };
-        self.lru_tracker.access(*key).await;
+        for _ in 0..Self::MAX_READ_RETRY_COUNT {
+            // Grab the best-guess file ID for the given key. If this fails, then the key is not in
+            // the cache, and we can return `None`.
+            let entry = *(self.map.get_async(key).await?);
+
+            // The next thing we need to do is try to open the file. This may fail for a plethora
+            // of reasons.
+            //
+            // TODO(markovejnovic): path_for allocates on heap, could be on stack.
+            let mut file = match tokio::fs::File::open(self.path_for(entry.fid)).await {
+                Ok(file) => file,
+                Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
+                    // The file was not found.
+                    // This may happen for two reasons:
+                    //
+                    // There is a bug in the code.
+                    //
+                    // The file was id was deleted after we read the file ID from the map on line:
+                    //
+                    //   let fid: usize = *(self.map.get_async(key).await?);
+                    //
+                    // The file can get deleted as part of the insert procedure, or the deletion
+                    // procedure, so we need to try to re-read the file.
+                    continue;
+                }
+                Err(e) => {
+                    error!(error = ?e, key = ?key, "IO error while opening file for cache key");
+
+                    // This MIGHT be recoverable. The IO error may very well be transient, so we'll
+                    // just say we don't have this value and then pray that the user will perform
+                    // an .insert
+                    return None;
+                }
+            };
 
-        let mut buf = Vec::new();
-        file.read_to_end(&mut buf).await.ok()?;
-        Some(buf)
+            let mut buf = Vec::new();
+            file.read_to_end(&mut buf)
+                .await
+                .inspect_err(|e| {
+                    error!(error = ?e, key = ?key, "IO error while reading file for cache key");
+                })
+                .ok()?;
+            self.lru_tracker.access(*key).await;
+            return Some(buf);
+        }
+
+        error!(key = ?key, attempt_count = Self::MAX_READ_RETRY_COUNT, "could not find file.");
+
+        // Again, might be recoverable if the user calls an .insert later on.
+        None
     }
 
     async fn contains(&self, key: &K) -> bool {
         self.map.contains_async(key).await
     }
 }
+
+impl<K: Eq + Hash + Copy + Send + 'static> AsyncWritableCache<K, Vec<u8>, CacheWriteError>
+    for FileCache<K>
+{
+    async fn insert(&self, key: &K, value: Vec<u8>) -> Result<(), CacheWriteError> {
+        // Inserts are tricky. The first thing we'll do is find a new file handle for the new
+        // value.
+        let new_entry = CacheMapEntry {
+            fid: self
+                .file_generator
+                .fetch_add(1, std::sync::atomic::Ordering::Relaxed),
+            size_bytes: value.len(),
+        };
+
+        if self.size_bytes.load(std::sync::atomic::Ordering::Relaxed) + new_entry.size_bytes
+            > self.max_size_bytes
+        {
+            // We need to evict some entries before we can insert this new entry. Let's just evict
+            // a batch of entries.
+            self.lru_tracker
+                .cull(Self::LRU_EVICTION_MAX_BATCH_SIZE)
+                .await;
+        }
+
+        let mut new_file = self.create_file(&self.path_for(new_entry.fid)).await?;
+        new_file.write_all(&value).await?;
+
+        self.size_bytes
+            .fetch_add(new_entry.size_bytes, std::sync::atomic::Ordering::Relaxed);
+
+        // Now we insert the new file ID into the map, and get the old file ID if it exists.
+        if let Some(old_entry) = self.map.upsert_async(*key, new_entry).await {
+            // If there was an old file ID, we need to delete the old file and notify the LRU
+            // tracker that the key was accessed.
+            self.lru_tracker.access(*key).await;
+
+            // TODO(markovejnovic): Could stack allocate the path.
+            // Note that the file already may be deleted at this point -- the LRU tracker deleter
+            // may have already deleted this file, so if the file doesn't exist, that's already
+            // fine with us.
+            self.delete_entry_from_disk_async(&old_entry).await?;
+        } else {
+            self.lru_tracker.insert(*key).await;
+        }
+
+        // Epic, we did it.
+        Ok(())
+    }
+}
diff --git a/lib/cache/traits.rs b/lib/cache/traits.rs
index 8e167be..0cb01db 100644
--- a/lib/cache/traits.rs
+++ b/lib/cache/traits.rs
@@ -1,39 +1,69 @@
 use std::{future::Future, hash::Hash, pin::Pin};
 
+/// A readable cache is a cache that can be read from, but not necessarily written to.
+///
+/// This trait is designed to be a generic trait for any cache. When using a cache as a generic
+/// parameter, this is the correct trait to use.
+///
+/// You should avoid manually implementing this trait, and prefer implementing the
+/// `SyncReadableCache` or `AsyncReadableCache` traits instead, which will automatically implement
+/// this trait for you.
 pub trait ReadableCache<K, V>
 where
     K: Eq + Hash,
     V: Clone,
 {
+    /// The future type returned by the `get` and `contains` methods. This is used to allow
+    /// zero-cost synchronous and asynchronous implementations of the `ReadableCache` trait.
     type Fut<'a, T: 'a>: Future<Output = T> + 'a
     where
         Self: 'a,
         K: 'a,
         V: 'a;
 
+    /// Fetch the value associated with the given key from the cache. If the key is not present in
+    /// the cache, return `None`.
     fn get<'a>(&'a self, key: &'a K) -> Self::Fut<'a, Option<V>>
     where
         V: 'a;
+
+    /// Check if the cache contains the given key.
     fn contains<'a>(&'a self, key: &'a K) -> Self::Fut<'a, bool>
     where
         V: 'a;
 }
 
+/// Convenience trait for implementing `ReadableCache` with synchronous cache accesses.
+///
+/// Avoid using this cache as a generic parameter. `ReadableCache` is the correct trait to use for
+/// generic parameters, and will support both synchronous and asynchronous implementations of the
+/// cache.
 pub trait SyncReadableCache<K, V>
 where
     K: Eq + Hash,
     V: Clone,
 {
+    /// See `ReadableCache::get`.
     fn get(&self, key: &K) -> Option<V>;
+
+    /// See `ReadableCache::contains`.
     fn contains(&self, key: &K) -> bool;
 }
 
+/// Convenience trait for implementing `ReadableCache` with asynchronous cache accesses.
+///
+/// Avoid using this cache as a generic parameter. `ReadableCache` is the correct trait to use for
+/// generic parameters, and will support both synchronous and asynchronous implementations of the
+/// cache.
 pub trait AsyncReadableCache<K, V>
 where
     K: Eq + Hash,
     V: Clone,
 {
+    /// See `ReadableCache::get`.
     async fn get(&self, key: &K) -> Option<V>;
+
+    /// See `ReadableCache::contains`.
     async fn contains(&self, key: &K) -> bool;
 }
 
@@ -84,3 +114,114 @@ impl<K: Eq + Hash, V: Clone, T: SyncReadableCache<K, V>> ReadableCache<K, V> for
         std::future::ready(SyncReadableCache::contains(self, key))
     }
 }
+
+/// A writable cache is a cache that can be written to, but not necessarily read from.
+///
+/// Note that this trait also does not require the cache support manual eviction of entries. Many
+/// caches only support automatic eviction (such as LRU eviction), and this trait is designed to
+/// support those caches as well.
+///
+/// This trait is designed to be a generic trait for any cache. When using a cache as a generic
+/// parameter, this is the correct trait to use.
+///
+/// You should avoid manually implementing this trait, and prefer implementing the
+/// `SyncWritableCache` or `AsyncWritableCache` traits instead, which will automatically implement
+/// this trait for you.
+pub trait WritableCache<K, V, E>
+where
+    K: Eq + Hash,
+    V: Clone,
+    E: std::error::Error,
+{
+    /// The future type returned by the `insert` method. This is used to allow zero-cost
+    /// synchronous and asynchronous implementations of the `WritableCache` trait.
+    type Fut<'a, T: 'a>: Future<Output = T> + 'a
+    where
+        Self: 'a,
+        K: 'a,
+        V: 'a,
+        E: 'a;
+
+    /// Insert the given value into the cache with the given key. If the key was already present in
+    /// the cache, return the old value. Otherwise, return `None`.
+    fn insert<'a>(&'a self, key: &'a K, value: V) -> Self::Fut<'a, Result<(), E>>
+    where
+        V: 'a;
+}
+
+/// Convenience trait for implementing `WritableCache` with synchronous cache accesses.
+///
+/// Avoid using this cache as a generic parameter. `WritableCache` is the correct trait to use for
+/// generic parameters, and will support both synchronous and asynchronous implementations of the
+/// cache.
+pub trait SyncWritableCache<K, V, E>
+where
+    K: Eq + Hash,
+    V: Clone,
+    E: std::error::Error,
+{
+    /// Insert the given value into the cache with the given key. If the key was already present in
+    /// the cache, return the old value. Otherwise, return `None`.
+    fn insert(&self, key: &K, value: V) -> Result<(), E>;
+}
+
+/// Convenience trait for implementing `WritableCache` with asynchronous cache accesses.
+///
+/// Avoid using this cache as a generic parameter. `WritableCache` is the correct trait to use for
+/// generic parameters, and will support both synchronous and asynchronous implementations of the
+/// cache.
+pub trait AsyncWritableCache<K, V, E>
+where
+    K: Eq + Hash,
+    V: Clone,
+    E: std::error::Error,
+{
+    /// Insert the given value into the cache with the given key. If the key was already present in
+    /// the cache, return the old value. Otherwise, return `None`.
+    async fn insert(&self, key: &K, value: V) -> Result<(), E>;
+}
+
+impl<K: Eq + Hash, V: Clone, E: std::error::Error, T: AsyncWritableCache<K, V, E>>
+    !SyncWritableCache<K, V, E> for T
+{
+}
+
+impl<K: Eq + Hash, V: Clone, E: std::error::Error, T: AsyncWritableCache<K, V, E>>
+    WritableCache<K, V, E> for T
+{
+    type Fut<'a, O: 'a>
+        = Pin<Box<dyn Future<Output = O> + 'a>>
+    where
+        Self: 'a,
+        K: 'a,
+        V: 'a,
+        E: 'a;
+
+    fn insert<'a>(&'a self, key: &'a K, value: V) -> Self::Fut<'a, Result<(), E>>
+    where
+        V: 'a,
+        E: 'a,
+    {
+        Box::pin(AsyncWritableCache::insert(self, key, value))
+    }
+}
+
+impl<K: Eq + Hash, V: Clone, E: std::error::Error, T: SyncWritableCache<K, V, E>>
+    WritableCache<K, V, E> for T
+{
+    type Fut<'a, O: 'a>
+        = std::future::Ready<O>
+    where
+        Self: 'a,
+        K: 'a,
+        V: 'a,
+        E: 'a;
+
+    fn insert<'a>(&'a self, key: &'a K, value: V) -> Self::Fut<'a, Result<(), E>>
+    where
+        V: 'a,
+        E: 'a,
+    {
+        std::future::ready(SyncWritableCache::insert(self, key, value))
+    }
+}

From a4bc26b3c4863a3b62aa4f21a2595d3a1dd6b785 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Fri, 13 Feb 2026 14:33:37 -0800
Subject: [PATCH 04/33] random lint

---
 lib/cache/eviction/lru.rs | 21 ++++++++++++++++-----
 lib/cache/mod.rs          |  3 +++
 lib/cache/traits.rs       |  2 ++
 3 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/lib/cache/eviction/lru.rs b/lib/cache/eviction/lru.rs
index 0faa7fc..287038d 100644
--- a/lib/cache/eviction/lru.rs
+++ b/lib/cache/eviction/lru.rs
@@ -35,7 +35,7 @@ struct LruProcessingTask<K: Copy, D: Deleter<K>> {
     ordered_key_set: LinkedHashSet<K>,
 
     /// Tracks which keys are currently considered evicted. Read the long explanation in
-    /// service_message for why this is necessary.
+    /// `service_message` for why this is necessary.
     evicted_keys: HashSet<K>,
 
     /// The deleter to call when we need to evict keys.
@@ -132,14 +132,17 @@ impl<K: Copy + Eq + Hash + Send + 'static, D: Deleter<K>> LruProcessingTask<K, D
                         self.evicted_keys.extend(eviction_set_it.clone());
                         self.deleter.delete(eviction_set_it);
                         for _ in 0..take_count {
-                            if let None = self.ordered_key_set.pop_front() {
+                            if self.ordered_key_set.pop_front().is_none() {
                                 break;
                             }
                         }
                     }
                 }
                 Message::Inserted(k) => {
-                    debug_assert!(!self.ordered_key_set.contains(&k));
+                    debug_assert!(
+                        !self.ordered_key_set.contains(&k),
+                        "key must not already exist in the ordered set when inserting"
+                    );
 
                     // If the key has been evicted, but is now inserted, that means the key is no
                     // longer stale.
@@ -156,7 +159,10 @@ impl<K: Copy + Eq + Hash + Send + 'static, D: Deleter<K>> LruProcessingTask<K, D
     }
 
     fn reposition_existing_key(&mut self, key: K) {
-        debug_assert!(self.ordered_key_set.contains(&key));
+        debug_assert!(
+            self.ordered_key_set.contains(&key),
+            "key must exist in the ordered set before repositioning"
+        );
 
         self.ordered_key_set.remove(&key);
         self.ordered_key_set.insert(key);
@@ -203,7 +209,12 @@ impl<K: Copy + Eq + Send + Hash + 'static> LruEvictionTracker<K> {
 
     /// Send a message to the worker. This is a helper method to reduce code duplication.
     async fn send_msg(&self, message: Message<K>) {
-        if let Err(_) = self.worker_message_sender.send(message.clone()).await {
+        if self
+            .worker_message_sender
+            .send(message.clone())
+            .await
+            .is_err()
+        {
             unreachable!();
         }
     }
diff --git a/lib/cache/mod.rs b/lib/cache/mod.rs
index 1bb3741..e0c1c97 100644
--- a/lib/cache/mod.rs
+++ b/lib/cache/mod.rs
@@ -1,3 +1,6 @@
+/// Cache eviction policies.
 pub mod eviction;
+/// File-backed cache implementation.
 pub mod fcache;
+/// Cache traits for read and write operations.
 pub mod traits;
diff --git a/lib/cache/traits.rs b/lib/cache/traits.rs
index 0cb01db..9a3d7ac 100644
--- a/lib/cache/traits.rs
+++ b/lib/cache/traits.rs
@@ -55,6 +55,7 @@ where
 /// Avoid using this cache as a generic parameter. `ReadableCache` is the correct trait to use for
 /// generic parameters, and will support both synchronous and asynchronous implementations of the
 /// cache.
+#[expect(async_fn_in_trait)]
 pub trait AsyncReadableCache<K, V>
 where
     K: Eq + Hash,
@@ -170,6 +171,7 @@ where
 /// Avoid using this cache as a generic parameter. `WritableCache` is the correct trait to use for
 /// generic parameters, and will support both synchronous and asynchronous implementations of the
 /// cache.
+#[expect(async_fn_in_trait)]
 pub trait AsyncWritableCache<K, V, E>
 where
     K: Eq + Hash,

From f8c4952a5a6f82873c52099c2f58d1baff63eebc Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Fri, 13 Feb 2026 14:39:57 -0800
Subject: [PATCH 05/33] Get the fcache.rs compiling

---
 lib/cache/fcache.rs | 181 ++++++++++++++++++++------------------------
 1 file changed, 84 insertions(+), 97 deletions(-)

diff --git a/lib/cache/fcache.rs b/lib/cache/fcache.rs
index 214cad5..521401b 100644
--- a/lib/cache/fcache.rs
+++ b/lib/cache/fcache.rs
@@ -18,15 +18,60 @@ use crate::{
 };
 use thiserror::Error;
 
-use tokio::io::{AsyncReadExt, AsyncWriteExt};
+use tokio::io::{AsyncReadExt as _, AsyncWriteExt as _};
+
+struct FileCacheShared<K: Eq + Hash> {
+    root_path: PathBuf,
+    map: scc::HashMap<K, CacheMapEntry>,
+    size_bytes: AtomicUsize,
+}
+
+impl<K: Eq + Hash + Send + Sync> FileCacheShared<K> {
+    fn size(&self) -> usize {
+        self.size_bytes.load(std::sync::atomic::Ordering::Relaxed)
+    }
+
+    fn path_for(&self, fid: usize) -> PathBuf {
+        self.root_path.join(fid.to_string())
+    }
+
+    /// Given an entry, remove it from the disk and update the size of the cache accordingly.
+    ///
+    /// Gracefully handles the case where the file doesn't exist, which can happen if the file was
+    /// deleted after we read the file ID from the map, but before we tried to delete
+    /// the file.
+    async fn delete_entry_from_disk_async(&self, entry: &CacheMapEntry) -> std::io::Result<()> {
+        match tokio::fs::remove_file(self.path_for(entry.fid)).await {
+            Ok(()) => {
+                self.size_bytes
+                    .fetch_sub(entry.size_bytes, std::sync::atomic::Ordering::Relaxed);
+                Ok(())
+            }
+            Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
+            Err(e) => Err(e),
+        }
+    }
+
+    fn delete_entry_from_disk_sync(&self, entry: &CacheMapEntry) -> std::io::Result<()> {
+        match std::fs::remove_file(self.path_for(entry.fid)) {
+            Ok(()) => {
+                self.size_bytes
+                    .fetch_sub(entry.size_bytes, std::sync::atomic::Ordering::Relaxed);
+                Ok(())
+            }
+            Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
+            Err(e) => Err(e),
+        }
+    }
+}
 
 struct LruEntryDeleter<K: Eq + Hash> {
-    file_cache: Arc<FileCache<K>>,
+    shared: Arc<FileCacheShared<K>>,
 }
 
 impl<K: Eq + Hash> LruEntryDeleter<K> {
-    fn new(file_cache: Arc<FileCache<K>>) -> Self {
-        Self { file_cache }
+    fn new(shared: Arc<FileCacheShared<K>>) -> Self {
+        Self { shared }
     }
 }
 
@@ -37,19 +82,16 @@ impl<K: Copy + Eq + Hash + Sync + Send + 'static> Deleter<K> for LruEntryDeleter
         K: 'a,
     {
         for key in keys {
-            // Deleting keys should be atomic from scc::HashMap, which means that all subsequent
-            // lookups on the same key will fail, and thus we'll be a-ok.
-            if let Some(entry) = self.file_cache.map.remove_sync(key) {
-                // On the other hand, deleting the file may fail for a variety of reasons, but
-                // that's all mostly managed by the delete_entry_from_disk_sync method, which
-                // gracefully handles the case where the file is already deleted.
-                self.file_cache.delete_entry_from_disk_sync(&entry.1);
+            if let Some(entry) = self.shared.map.remove_sync(key)
+                && let Err(e) = self.shared.delete_entry_from_disk_sync(&entry.1)
+            {
+                error!(error = ?e, "failed to delete evicted cache entry from disk");
             }
         }
     }
 }
 
-/// Error thrown during construction of a FileCache which describes why the given root path is
+/// Error thrown during construction of a `FileCache` which describes why the given root path is
 /// invalid.
 #[derive(Debug, Error)]
 pub enum InvalidRootPathError {
@@ -92,22 +134,12 @@ struct CacheMapEntry {
 ///
 /// This cache is considered thread-safe and async-safe.
 pub struct FileCache<K: Eq + Hash> {
-    /// The root path for the cache. This is the directory where all cache files will be stored.
-    root_path: PathBuf,
-
-    /// The main map of the cache. This maps keys to file IDs, which are just integers that
-    /// correspond to file paths on disk. The file ID is used to generate the file path for the
-    /// cache entry.
-    map: scc::HashMap<K, CacheMapEntry>,
+    shared: Arc<FileCacheShared<K>>,
 
     /// Generates unique file paths for new cache entries. This is just a counter that increments
     /// for each new file, and the file ID is the value of the counter at the time of creation.
     file_generator: AtomicUsize,
 
-    /// Rough estimate of the current size of the cache in bytes. Not exact, but should be close
-    /// enough for eviction purposes.
-    size_bytes: AtomicUsize,
-
     /// The maximum size of the cache in bytes. This is used to determine when to evict entries
     /// from the cache. This is a soft hint, rather than a hard limit, and the cache may
     /// temporarily exceed this size during insertions, but it will try to evict entries as soon as
@@ -119,7 +151,7 @@ pub struct FileCache<K: Eq + Hash> {
     lru_tracker: LruEvictionTracker<K>,
 }
 
-impl<K: Eq + Hash + Send + Copy + 'static> FileCache<K> {
+impl<K: Eq + Hash + Send + Sync + Copy + 'static> FileCache<K> {
     // How many cache entries to evict at most in a single batch.
     //
     // Not really sure how to determine this number.
@@ -143,12 +175,11 @@ impl<K: Eq + Hash + Send + Copy + 'static> FileCache<K> {
     /// # Args
     ///
     /// * `file_path` - If the path exists, it must either be an empty directory, or a directory
-    ///                 which was previously used as a cache for this program.
+    ///   which was previously used as a cache for this program.
     /// * `max_size_bytes` - The maximum size of the cache in bytes. This is used to determine when
-    ///                      to evict entries from the cache. This is a soft hint, rather than a
-    ///                      hard limit, and the cache may temporarily exceed this size during
-    ///                      insertions, but it will try to evict entries as soon as possible to
-    ///                      get back under this limit.
+    ///   to evict entries from the cache. This is a soft hint, rather than a hard limit, and the
+    ///   cache may temporarily exceed this size during insertions, but it will try to evict entries
+    ///   as soon as possible to get back under this limit.
     pub async fn new(
         file_path: &Path,
         max_size_bytes: usize,
@@ -186,98 +217,53 @@ impl<K: Eq + Hash + Send + Copy + 'static> FileCache<K> {
         pbuf.push(Self::GITFS_MARKER_FILE);
         tokio::fs::OpenOptions::new()
             .create(true)
+            .truncate(true)
             .write(true)
             .open(&pbuf)
             .await?;
         pbuf.pop();
 
-        Ok(Self {
+        let shared = Arc::new(FileCacheShared {
             root_path: pbuf,
             map: scc::HashMap::new(),
-            file_generator: AtomicUsize::new(0),
             size_bytes: AtomicUsize::new(0),
+        });
+
+        Ok(Self {
+            shared: Arc::clone(&shared),
+            file_generator: AtomicUsize::new(0),
             max_size_bytes,
             lru_tracker: LruEvictionTracker::spawn(
-                LruEntryDeleter::new(),
+                LruEntryDeleter::new(shared),
                 Self::LRU_EVICTION_MAX_BATCH_SIZE,
             ),
         })
     }
 
-    /// Retrieve the correct path for the given file ID.
-    fn path_for(&self, fid: usize) -> PathBuf {
-        self.root_path.join(fid.to_string())
-    }
-
     async fn create_file(&self, path: &Path) -> Result<tokio::fs::File, std::io::Error> {
         tokio::fs::OpenOptions::new()
             .create(true)
+            .truncate(true)
             .write(true)
             .open(path)
             .await
     }
-
-    /// Given an entry, remove it from the disk and update the size of the cache accordingly.
-    ///
-    /// Gracefully handles the case where the file doesn't exist, which can happen if the file was
-    /// deleted after we read the file ID from the map, but before we tried to delete
-    /// the file.
-    async fn delete_entry_from_disk_async(&self, entry: &CacheMapEntry) -> std::io::Result<()> {
-        match tokio::fs::remove_file(self.path_for(entry.fid)).await {
-            Ok(()) => {
-                self.size_bytes
-                    .fetch_sub(entry.size_bytes, std::sync::atomic::Ordering::Relaxed);
-                Ok(())
-            }
-            // Note that there's a race condition between the deleter and the mutator in
-            // AsyncWritableCache. Either one of these operations atomically grabs the file ID (so
-            // accessing the file ID is safe), but then they race to delete the file.
-            //
-            // Consequently, it's quite possible that the file is already deleted by the time we
-            // try to delete it, and that's already fine with us, so we just treat that case as a
-            // successful deletion.
-            Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
-            Err(e) => Err(e),
-        }
-    }
-
-    /// Given an entry, remove it from the disk and update the size of the cache accordingly.
-    ///
-    /// Gracefully handles the case where the file doesn't exist, which can happen if the file was
-    /// deleted after we read the file ID from the map, but before we tried to delete
-    /// the file.
-    fn delete_entry_from_disk_sync(&self, entry: &CacheMapEntry) -> std::io::Result<()> {
-        match std::fs::remove_file(self.path_for(entry.fid)) {
-            Ok(()) => {
-                self.size_bytes
-                    .fetch_sub(entry.size_bytes, std::sync::atomic::Ordering::Relaxed);
-                Ok(())
-            }
-            // Note that there's a race condition between the deleter and the mutator in
-            // AsyncWritableCache. Either one of these operations atomically grabs the file ID (so
-            // accessing the file ID is safe), but then they race to delete the file.
-            //
-            // Consequently, it's quite possible that the file is already deleted by the time we
-            // try to delete it, and that's already fine with us, so we just treat that case as a
-            // successful deletion.
-            Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
-            Err(e) => Err(e),
-        }
-    }
 }
 
-impl<K: Eq + Hash + Copy + Send + Debug + 'static> AsyncReadableCache<K, Vec<u8>> for FileCache<K> {
+impl<K: Eq + Hash + Copy + Send + Sync + Debug + 'static> AsyncReadableCache<K, Vec<u8>>
+    for FileCache<K>
+{
     async fn get(&self, key: &K) -> Option<Vec<u8>> {
         for _ in 0..Self::MAX_READ_RETRY_COUNT {
             // Grab the best-guess file ID for the given key. If this fails, then the key is not in
             // the cache, and we can return `None`.
-            let entry = *(self.map.get_async(key).await?);
+            let entry = *(self.shared.map.get_async(key).await?);
 
             // The next thing we need to do is try to open the file. This may fail for a plethora
             // of reasons.
             //
             // TODO(markovejnovic): path_for allocates on heap, could be on stack.
-            let mut file = match tokio::fs::File::open(self.path_for(entry.fid)).await {
+            let mut file = match tokio::fs::File::open(self.shared.path_for(entry.fid)).await {
                 Ok(file) => file,
                 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
                     // The file was not found.
@@ -321,11 +307,11 @@ impl<K: Eq + Hash + Copy + Send + Debug + 'static> AsyncReadableCache<K, Vec<u8>
     }
 
     async fn contains(&self, key: &K) -> bool {
-        self.map.contains_async(key).await
+        self.shared.map.contains_async(key).await
     }
 }
 
-impl<K: Eq + Hash + Copy + Send + 'static> AsyncWritableCache<K, Vec<u8>, CacheWriteError>
+impl<K: Eq + Hash + Copy + Send + Sync + 'static> AsyncWritableCache<K, Vec<u8>, CacheWriteError>
     for FileCache<K>
 {
     async fn insert(&self, key: &K, value: Vec<u8>) -> Result<(), CacheWriteError> {
@@ -338,9 +324,7 @@ impl<K: Eq + Hash + Copy + Send + 'static> AsyncWritableCache<K, Vec<u8>, CacheW
             size_bytes: value.len(),
         };
 
-        if self.size_bytes.load(std::sync::atomic::Ordering::Relaxed) + new_entry.size_bytes
-            > self.max_size_bytes
-        {
+        if self.shared.size() + new_entry.size_bytes > self.max_size_bytes {
             // We need to evict some entries before we can insert this new entry. Let's just evict
             // a batch of entries.
             self.lru_tracker
@@ -348,14 +332,17 @@ impl<K: Eq + Hash + Copy + Send + 'static> AsyncWritableCache<K, Vec<u8>, CacheW
                 .await;
         }
 
-        let mut new_file = self.create_file(&self.path_for(new_entry.fid)).await?;
+        let mut new_file = self
+            .create_file(&self.shared.path_for(new_entry.fid))
+            .await?;
         new_file.write_all(&value).await?;
 
-        self.size_bytes
+        self.shared
+            .size_bytes
             .fetch_add(new_entry.size_bytes, std::sync::atomic::Ordering::Relaxed);
 
         // Now we insert the new file ID into the map, and get the old file ID if it exists.
-        if let Some(old_entry) = self.map.upsert_async(*key, new_entry).await {
+        if let Some(old_entry) = self.shared.map.upsert_async(*key, new_entry).await {
             // If there was an old file ID, we need to delete the old file and notify the LRU
             // tracker that the key was accessed.
             self.lru_tracker.access(*key).await;
@@ -364,7 +351,7 @@ impl<K: Eq + Hash + Copy + Send + 'static> AsyncWritableCache<K, Vec<u8>, CacheW
             // Note that the file already may be deleted at this point -- the LRU tracker deleter
             // may have already deleted this file, so if the file doesn't exist, that's already
             // fine with us.
-            self.delete_entry_from_disk_async(&old_entry).await?;
+            self.shared.delete_entry_from_disk_async(&old_entry).await?;
         } else {
             self.lru_tracker.insert(*key).await;
         }

From fc8bb4757581708480e98c535acc60ed981c3379 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Fri, 13 Feb 2026 14:40:30 -0800
Subject: [PATCH 06/33] remove dumb stuff

---
 Cargo.lock | 1 -
 Cargo.toml | 1 -
 2 files changed, 2 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 93ac821..ebbe571 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -788,7 +788,6 @@ dependencies = [
  "reqwest-middleware",
  "rustc-hash",
  "scc",
- "scopeguard",
  "secrecy",
  "self_update",
  "semver",
diff --git a/Cargo.toml b/Cargo.toml
index 5ea9bb9..b6fe250 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -51,7 +51,6 @@ opentelemetry_sdk = { version = "0.29", features = ["rt-tokio"] }
 opentelemetry-otlp = { version = "0.29", default-features = false, features = ["http-proto", "trace", "reqwest-blocking-client"] }
 tracing-opentelemetry = { version = "0.30" }
 hashlink = "0.11.0"
-scopeguard = "1.2.0"
 
 [features]
 default = []

From 97c647bf6e312fb38701a0ae43b2683372cb7bf9 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Fri, 13 Feb 2026 17:03:44 -0800
Subject: [PATCH 07/33] address clanker comments

---
 lib/cache/eviction/lru.rs | 300 +++++++++++++++++++++++++-------------
 lib/cache/fcache.rs       | 109 ++++++++------
 2 files changed, 265 insertions(+), 144 deletions(-)

diff --git a/lib/cache/eviction/lru.rs b/lib/cache/eviction/lru.rs
index 287038d..f821bc3 100644
--- a/lib/cache/eviction/lru.rs
+++ b/lib/cache/eviction/lru.rs
@@ -1,19 +1,68 @@
 //! Implements the LRU eviction policy.
 
-use std::{collections::HashSet, future::Future, hash::Hash, pin::Pin, sync::Arc};
+use std::{
+    collections::HashSet,
+    future::Future,
+    hash::Hash,
+    pin::Pin,
+    sync::{Arc, OnceLock, atomic::AtomicU64},
+};
 
 use hashlink::LinkedHashSet;
 use tokio::{sync::mpsc::Receiver, task::JoinHandle};
 
+#[derive(Debug)]
+struct DeletionIndicator {
+    underlying: AtomicU64,
+}
+
+impl DeletionIndicator {
+    /// Call to mark the start of a batch of deletions.
+    fn submit_batch(&self) {
+        self.underlying
+            .fetch_add(1 << 32, std::sync::atomic::Ordering::Relaxed);
+    }
+
+    /// Call to mark a batch as being processed right now.
+    fn process_batch(&self, count: u32) {
+        self.underlying.fetch_add(
+            count as u64 - (1 << 32),
+            std::sync::atomic::Ordering::Relaxed,
+        );
+    }
+
+    /// Call to mark a deletion as being completed.
+    fn observe_deletion(&self) {
+        self.underlying
+            .fetch_sub(1, std::sync::atomic::Ordering::Relaxed);
+    }
+
+    /// Check if there are any scheduled or active deletions.
+    fn have_pending_deletions(&self) -> bool {
+        self.underlying.load(std::sync::atomic::Ordering::Relaxed) == 0
+    }
+
+    /// Check if there are any scheduled batches of deletions.
+    fn have_pending_batches(&self) -> bool {
+        self.underlying.load(std::sync::atomic::Ordering::Relaxed) >= 1 << 32
+    }
+}
+
+impl Default for DeletionIndicator {
+    fn default() -> Self {
+        Self {
+            underlying: AtomicU64::new(0),
+        }
+    }
+}
+
 /// A trait for deleting keys from the cache. This is used by the LRU eviction tracker to delete
 /// keys when they are evicted.
-pub trait Deleter<K>: Send + 'static {
+pub trait Deleter<K>: Send + Clone + 'static {
     /// Delete the given keys from the cache. The keys are guaranteed to be in the order of
     /// eviction. You absolutely MUST delete the keys in the order they are given, otherwise the
     /// LRU eviction tracker will get very confused and break.
-    fn delete<'a>(&mut self, keys: impl Iterator<Item = &'a K>)
-    where
-        K: 'a;
+    fn delete(&mut self, key: K) -> impl Future<Output = ()> + Send;
 }
 
 /// Messages sent to the LRU eviction tracker worker.
@@ -22,7 +71,7 @@ enum Message<K> {
     /// Notify the LRU eviction tracker that the given key was accessed.
     Accessed(K),
     /// Request an eviction set of the given size.
-    Evict(usize),
+    Evict(u32),
     /// Notify the LRU eviction tracker that a given key was inserted.
     Inserted(K),
 }
@@ -40,122 +89,132 @@ struct LruProcessingTask<K: Copy, D: Deleter<K>> {
 
     /// The deleter to call when we need to evict keys.
     deleter: D,
+
+    /// Pointer into the shared deletion tracker.
+    shared: Arc<WorkerState>,
 }
 
 impl<K: Copy + Eq + Hash + Send + 'static, D: Deleter<K>> LruProcessingTask<K, D> {
-    fn new(deleter: D, receiver: Receiver<Message<K>>) -> Self {
+    fn new(deleter: D, receiver: Receiver<Message<K>>, shared: Arc<WorkerState>) -> Self {
         Self {
             receiver,
             ordered_key_set: LinkedHashSet::new(),
             evicted_keys: HashSet::new(),
             deleter,
+            shared,
         }
     }
 
-    fn spawn_task(deleter: D, receiver: Receiver<Message<K>>) -> JoinHandle<()> {
+    fn spawn_task(
+        deleter: D,
+        receiver: Receiver<Message<K>>,
+        shared: Arc<WorkerState>,
+    ) -> JoinHandle<()> {
         // TODO(markovejnovic): This should have a best-effort drop.
         tokio::spawn(async move {
-            let mut task = Self::new(deleter, receiver);
+            let mut task = Self::new(deleter, receiver, shared);
             task.work().await;
         })
     }
 
     async fn work(&mut self) {
         while let Some(msg) = self.receiver.recv().await
-            && self.service_message(msg).await
+            && self.service_message(msg)
         {}
     }
 
     /// Returns true if the task should continue working.
     #[must_use]
-    fn service_message(
-        &mut self,
-        message: Message<K>,
-    ) -> Pin<Box<dyn Future<Output = bool> + Send + '_>> {
-        Box::pin(async move {
-            match message {
-                Message::Accessed(k) => {
-                    if self.evicted_keys.contains(&k) {
-                        // This is a ghost access, happens when a client accesses a key that may not
-                        // have been cleaned up yet. Just ignore it.
-                        return true;
-                    }
-
-                    self.reposition_existing_key(k);
+    fn service_message(&mut self, message: Message<K>) -> bool {
+        match message {
+            Message::Accessed(k) => {
+                if self.evicted_keys.contains(&k) {
+                    // This is a ghost access, happens when a client accesses a key that may not
+                    // have been cleaned up yet. Just ignore it.
+                    return true;
                 }
-                Message::Evict(max_count) => {
-                    // Before we send off the eviction set, we actually need to remove the evicted
-                    // keys from the pending queue. Read the following for an explanation.
-                    //
-                    // The client may access a key AFTER an eviction notice. Until the eviction
-                    // notice is processed by the client, the client will have the key available to
-                    // it.
-                    //
-                    // Consequently, the client may very well access the key after it sent out an
-                    // eviction notice, but before the eviction notice is processed. This will
-                    // result in the key being added as an access message after the eviction
-                    // notice, but before the eviction set is sent to the client.
-                    //
-                    // The problem is, the LruProcessingTask has no way of knowing whether the
-                    // message the client sent is a stale message, or a legit message.
-                    //
-                    // Consider the following queue state in between two loop iterations of the
-                    // LruProcessingTask:
-                    // A -- access
-                    // B -- insert
-                    // E -- eviction request
-                    // C -- the client thread which made the request
-                    //
-                    // A1 A2 A3 E1 A1 B1 -- after this point, no clients can access A1
-                    // C1 C1 C1 C1 C2 C1 ---------------------------------------------> time
-                    //
-                    // The scenario here is:
-                    // - A1, A2, A3 are used by the clients.
-                    // - C1 wants to add B1, but it doesn't have enough space, so it sends an
-                    //   eviction request.
-                    // - In parallel, C2 accesses A1, which is completely legal.
-                    //
-                    // The result is that our queue has A1, even after we sent out the eviction
-                    // request, and we have no way of knowing whether A1 is a stale message or not.
-                    //
-                    // To prevent this whole "is this thing stale or not" problem, we need to
-                    // "erase" keys evicted in the future. Unfortunately, you can't mutate queues,
-                    // so the best we can do is simply mark the stupid keys as "evicted" and ignore
-                    // any accesses from these keys until they're marked as "inserted" again.
-                    //
-                    // We cannot mutate the queue to mark messages as stale, so the best we can do
-                    // is mark the keys as "evicted" and ignore any accesses from these keys until
-                    // they're marked as "inserted" again.
-                    {
-                        let take_count = self.ordered_key_set.len().min(max_count);
-                        let eviction_set_it = self.ordered_key_set.iter().take(take_count);
-                        self.evicted_keys.extend(eviction_set_it.clone());
-                        self.deleter.delete(eviction_set_it);
-                        for _ in 0..take_count {
-                            if self.ordered_key_set.pop_front().is_none() {
-                                break;
-                            }
-                        }
+
+                self.reposition_existing_key(k);
+            }
+            Message::Evict(max_count) => {
+                // Before we send off the eviction set, we actually need to remove the evicted
+                // keys from the pending queue. Read the following for an explanation.
+                //
+                // The client may access a key AFTER an eviction notice. Until the eviction
+                // notice is processed by the client, the client will have the key available to
+                // it.
+                //
+                // Consequently, the client may very well access the key after it sent out an
+                // eviction notice, but before the eviction notice is processed. This will
+                // result in the key being added as an access message after the eviction
+                // notice, but before the eviction set is sent to the client.
+                //
+                // The problem is, the LruProcessingTask has no way of knowing whether the
+                // message the client sent is a stale message, or a legit message.
+                //
+                // Consider the following queue state in between two loop iterations of the
+                // LruProcessingTask:
+                // A -- access
+                // B -- insert
+                // E -- eviction request
+                // C -- the client thread which made the request
+                //
+                // A1 A2 A3 E1 A1 B1 -- after this point, no clients can access A1
+                // C1 C1 C1 C1 C2 C1 ---------------------------------------------> time
+                //
+                // The scenario here is:
+                // - A1, A2, A3 are used by the clients.
+                // - C1 wants to add B1, but it doesn't have enough space, so it sends an
+                //   eviction request.
+                // - In parallel, C2 accesses A1, which is completely legal.
+                //
+                // The result is that our queue has A1, even after we sent out the eviction
+                // request, and we have no way of knowing whether A1 is a stale message or not.
+                //
+                // To prevent this whole "is this thing stale or not" problem, we need to
+                // "erase" keys evicted in the future. Unfortunately, you can't mutate queues,
+                // so the best we can do is simply mark the stupid keys as "evicted" and ignore
+                // any accesses from these keys until they're marked as "inserted" again.
+                //
+                // We cannot mutate the queue to mark messages as stale, so the best we can do
+                // is mark the keys as "evicted" and ignore any accesses from these keys until
+                // they're marked as "inserted" again.
+                {
+                    // These integer casts are safe, since max_count is guaranteed to fit
+                    // within a u32, min(MAX_U32, MAX_USIZE) == MAX_U32.
+                    let take_count = self.ordered_key_set.len().min(max_count as usize) as u32;
+                    self.shared.active_deletions.process_batch(take_count);
+                    for _ in 0..take_count {
+                        let Some(k) = self.ordered_key_set.pop_front() else {
+                            break;
+                        };
+                        self.evicted_keys.insert(k);
+                        let mut deleter = self.deleter.clone();
+                        let shared_clone = self.shared.clone();
+                        tokio::spawn(async move {
+                            deleter.delete(k).await;
+                            shared_clone.active_deletions.observe_deletion();
+                        });
                     }
                 }
-                Message::Inserted(k) => {
-                    debug_assert!(
-                        !self.ordered_key_set.contains(&k),
-                        "key must not already exist in the ordered set when inserting"
-                    );
-
-                    // If the key has been evicted, but is now inserted, that means the key is no
-                    // longer stale.
-                    if self.evicted_keys.contains(&k) {
-                        self.evicted_keys.remove(&k);
-                    }
+            }
+            Message::Inserted(k) => {
+                debug_assert!(
+                    !self.ordered_key_set.contains(&k),
+                    "key must not already exist in the ordered set when inserting"
+                );
 
-                    self.ordered_key_set.insert(k);
+                // If the key has been evicted, but is now inserted, that means the key is no
+                // longer stale.
+                if self.evicted_keys.contains(&k) {
+                    self.evicted_keys.remove(&k);
                 }
+
+                self.ordered_key_set.insert(k);
             }
+        }
 
-            true
-        })
+        true
     }
 
     fn reposition_existing_key(&mut self, key: K) {
@@ -169,12 +228,18 @@ impl<K: Copy + Eq + Hash + Send + 'static, D: Deleter<K>> LruProcessingTask<K, D
     }
 }
 
+#[derive(Debug)]
+struct WorkerState {
+    active_deletions: DeletionIndicator,
+    _worker: OnceLock<JoinHandle<()>>,
+}
+
 /// An LRU eviction tracker. This is used to track the least recently used keys in the cache, and
 /// to evict keys when necessary.
-#[derive(Debug, Clone)]
+#[derive(Debug)]
 pub struct LruEvictionTracker<K> {
     worker_message_sender: tokio::sync::mpsc::Sender<Message<K>>,
-    _worker: Arc<JoinHandle<()>>,
+    worker_state: Arc<WorkerState>,
 }
 
 impl<K: Copy + Eq + Send + Hash + 'static> LruEvictionTracker<K> {
@@ -182,40 +247,69 @@ impl<K: Copy + Eq + Send + Hash + 'static> LruEvictionTracker<K> {
     pub fn spawn<D: Deleter<K>>(deleter: D, channel_size: usize) -> Self {
         let (tx, rx) = tokio::sync::mpsc::channel(channel_size);
 
+        let worker_state = Arc::new(WorkerState {
+            active_deletions: DeletionIndicator::default(),
+            _worker: OnceLock::new(),
+        });
+        let worker = LruProcessingTask::spawn_task(deleter, rx, worker_state.clone());
+        worker_state._worker.set(worker).unwrap();
+
         Self {
             worker_message_sender: tx,
-            _worker: Arc::new(LruProcessingTask::spawn_task(deleter, rx)),
+            worker_state,
         }
     }
 
     /// Notify the LRU eviction tracker that the given key was inserted.
     ///
     /// You MUST call this method for every new key that is inserted into the cache.
-    pub async fn insert(&self, key: K) {
-        self.send_msg(Message::Inserted(key)).await;
+    pub fn insert(&self, key: K) {
+        self.send_msg(Message::Inserted(key));
     }
 
     /// Notify the LRU eviction tracker that the given key was accessed.
     ///
     /// You MUST call this method for every read or update to a key.
-    pub async fn access(&self, key: K) {
-        self.send_msg(Message::Accessed(key)).await;
+    pub fn access(&self, key: K) {
+        self.send_msg(Message::Accessed(key));
     }
 
     /// Cull the least recently used keys with the given deletion method.
-    pub async fn cull(&self, max_count: usize) {
-        self.send_msg(Message::Evict(max_count)).await;
+    pub fn cull(&self, max_count: u32) {
+        // Culling sets the top-most bit of the active deletion count to indicate that there are
+        // queued deletions.
+        self.worker_state.active_deletions.submit_batch();
+        self.send_msg(Message::Evict(max_count));
+    }
+
+    /// Check whether there are culls that are already scheduled.
+    pub fn have_pending_culls(&self) -> bool {
+        self.worker_state.active_deletions.have_pending_batches()
     }
 
     /// Send a message to the worker. This is a helper method to reduce code duplication.
-    async fn send_msg(&self, message: Message<K>) {
+    fn send_msg(&self, message: Message<K>) {
         if self
             .worker_message_sender
-            .send(message.clone())
-            .await
+            .blocking_send(message.clone())
             .is_err()
         {
             unreachable!();
         }
     }
+
+    /// Get the total number of currently active deletions.
+    ///
+    /// Useful if you need to spinlock on this, for whatever reason.
+    ///
+    ///
+    /// # Note
+    ///
+    /// Does not guarantee ordering in respect to the deletion. The only guarantee this method
+    /// provides is whether any deletions are pending or not, but it is not safe to use this to
+    /// synchronize upon the results of your deletions, since the underlying atomic is used in a
+    /// relaxed manner.
+    pub fn have_active_deletions(&self) -> bool {
+        self.worker_state.active_deletions.have_pending_deletions()
+    }
 }
diff --git a/lib/cache/fcache.rs b/lib/cache/fcache.rs
index 521401b..ea1b8ee 100644
--- a/lib/cache/fcache.rs
+++ b/lib/cache/fcache.rs
@@ -35,36 +35,21 @@ impl<K: Eq + Hash + Send + Sync> FileCacheShared<K> {
         self.root_path.join(fid.to_string())
     }
 
-    /// Given an entry, remove it from the disk and update the size of the cache accordingly.
+    /// Given an entry, remove it from the disk. Note this does not update the cache size tracker.
     ///
     /// Gracefully handles the case where the file doesn't exist, which can happen if the file was
     /// deleted after we read the file ID from the map, but before we tried to delete
     /// the file.
     async fn delete_entry_from_disk_async(&self, entry: &CacheMapEntry) -> std::io::Result<()> {
         match tokio::fs::remove_file(self.path_for(entry.fid)).await {
-            Ok(()) => {
-                self.size_bytes
-                    .fetch_sub(entry.size_bytes, std::sync::atomic::Ordering::Relaxed);
-                Ok(())
-            }
-            Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
-            Err(e) => Err(e),
-        }
-    }
-
-    fn delete_entry_from_disk_sync(&self, entry: &CacheMapEntry) -> std::io::Result<()> {
-        match std::fs::remove_file(self.path_for(entry.fid)) {
-            Ok(()) => {
-                self.size_bytes
-                    .fetch_sub(entry.size_bytes, std::sync::atomic::Ordering::Relaxed);
-                Ok(())
-            }
+            Ok(()) => Ok(()),
             Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
             Err(e) => Err(e),
         }
     }
 }
 
+#[derive(Clone)]
 struct LruEntryDeleter<K: Eq + Hash> {
     shared: Arc<FileCacheShared<K>>,
 }
@@ -77,16 +62,15 @@ impl<K: Eq + Hash> LruEntryDeleter<K> {
 
 impl<K: Copy + Eq + Hash + Sync + Send + 'static> Deleter<K> for LruEntryDeleter<K> {
     /// The LRU cache will call this method when it wants to evict keys.
-    fn delete<'a>(&mut self, keys: impl Iterator<Item = &'a K>)
-    where
-        K: 'a,
-    {
-        for key in keys {
-            if let Some(entry) = self.shared.map.remove_sync(key)
-                && let Err(e) = self.shared.delete_entry_from_disk_sync(&entry.1)
-            {
+    async fn delete(&mut self, key: K) {
+        if let Some(entry) = self.shared.map.remove_sync(&key) {
+            if let Err(e) = self.shared.delete_entry_from_disk_async(&entry.1).await {
                 error!(error = ?e, "failed to delete evicted cache entry from disk");
             }
+
+            self.shared
+                .size_bytes
+                .fetch_sub(entry.1.size_bytes, std::sync::atomic::Ordering::Relaxed);
         }
     }
 }
@@ -155,7 +139,12 @@ impl<K: Eq + Hash + Send + Sync + Copy + 'static> FileCache<K> {
     // How many cache entries to evict at most in a single batch.
     //
     // Not really sure how to determine this number.
-    const LRU_EVICTION_MAX_BATCH_SIZE: usize = 32;
+    const LRU_EVICTION_MAX_BATCH_SIZE: u32 = 8;
+
+    // The maximum number of messages that can be buffered between this file cache and the LRU
+    // eviction tracker. If this is too small, then the file cache will be blocked waiting for the
+    // eviction tracker to catch up.
+    const MAX_LRU_TRACKER_CHANNEL_SIZE: usize = 256;
 
     // Dangerous: Changing this constant may cause the program to treat existing cache directories
     // as invalid, and thus provide a worse user experience. Do not change this unless you have a
@@ -235,7 +224,7 @@ impl<K: Eq + Hash + Send + Sync + Copy + 'static> FileCache<K> {
             max_size_bytes,
             lru_tracker: LruEvictionTracker::spawn(
                 LruEntryDeleter::new(shared),
-                Self::LRU_EVICTION_MAX_BATCH_SIZE,
+                Self::MAX_LRU_TRACKER_CHANNEL_SIZE,
             ),
         })
     }
@@ -250,6 +239,21 @@ impl<K: Eq + Hash + Send + Sync + Copy + 'static> FileCache<K> {
     }
 }
 
+impl<K: Eq + Hash> Drop for FileCache<K> {
+    fn drop(&mut self) {
+        // Surprisingly, it's safe to delete files here.
+        //
+        // Note that the LRU tracker is running at the time of Drop. It, consequently, may end up
+        // calling the deleter, but that's fine, since the deleter ignores this problem gracefully.
+        //
+        // It is 100% safe to do this here, since it's guaranteed no concurrent task is borrowing
+        // the FileCache at this point.
+        if let Err(e) = std::fs::remove_dir_all(&self.shared.root_path) {
+            error!(error = ?e, "failed to delete cache directory on drop");
+        }
+    }
+}
+
 impl<K: Eq + Hash + Copy + Send + Sync + Debug + 'static> AsyncReadableCache<K, Vec<u8>>
     for FileCache<K>
 {
@@ -296,7 +300,7 @@ impl<K: Eq + Hash + Copy + Send + Sync + Debug + 'static> AsyncReadableCache<K,
                     error!(error = ?e, key = ?key, "IO error while reading file for cache key");
                 })
                 .ok()?;
-            self.lru_tracker.access(*key).await;
+            self.lru_tracker.access(*key);
             return Some(buf);
         }
 
@@ -324,12 +328,25 @@ impl<K: Eq + Hash + Copy + Send + Sync + 'static> AsyncWritableCache<K, Vec<u8>,
             size_bytes: value.len(),
         };
 
-        if self.shared.size() + new_entry.size_bytes > self.max_size_bytes {
-            // We need to evict some entries before we can insert this new entry. Let's just evict
-            // a batch of entries.
-            self.lru_tracker
-                .cull(Self::LRU_EVICTION_MAX_BATCH_SIZE)
-                .await;
+        while self.shared.size() + new_entry.size_bytes > self.max_size_bytes {
+            // TODO(markovejnovic): This whole spinlock situation sounded better in my head, but
+            // realistically, I think I could've gotten away with just a plain Notify. It is what
+            // it is now.
+
+            // The cache doesn't have any space for this new entry, so we need to evict some
+            // entries before we can insert this new entry.
+            // Before we can do that, we need to make sure there are no other pending culls. If we
+            // let multiple culls happen at the same time, we risk draining a lot more entries from
+            // the cache than necessary, which results in a regression.
+            if self.lru_tracker.have_pending_culls() {
+                // If there are any pending culls, then we need to just wait.
+                tokio::task::yield_now().await;
+                continue;
+            }
+
+            // There are no culls in progress, but the cache is still too full for this new entry,
+            // which means we need to evict some entries.
+            self.lru_tracker.cull(Self::LRU_EVICTION_MAX_BATCH_SIZE);
         }
 
         let mut new_file = self
@@ -337,15 +354,14 @@ impl<K: Eq + Hash + Copy + Send + Sync + 'static> AsyncWritableCache<K, Vec<u8>,
             .await?;
         new_file.write_all(&value).await?;
 
-        self.shared
-            .size_bytes
-            .fetch_add(new_entry.size_bytes, std::sync::atomic::Ordering::Relaxed);
+        let mut size_delta: isize = new_entry.size_bytes as isize;
 
         // Now we insert the new file ID into the map, and get the old file ID if it exists.
-        if let Some(old_entry) = self.shared.map.upsert_async(*key, new_entry).await {
+        if let Some(old_entry) = self.shared.map.upsert_sync(*key, new_entry) {
             // If there was an old file ID, we need to delete the old file and notify the LRU
             // tracker that the key was accessed.
-            self.lru_tracker.access(*key).await;
+            self.lru_tracker.access(*key);
+            size_delta -= old_entry.size_bytes as isize;
 
             // TODO(markovejnovic): Could stack allocate the path.
             // Note that the file already may be deleted at this point -- the LRU tracker deleter
@@ -353,7 +369,18 @@ impl<K: Eq + Hash + Copy + Send + Sync + 'static> AsyncWritableCache<K, Vec<u8>,
             // fine with us.
             self.shared.delete_entry_from_disk_async(&old_entry).await?;
         } else {
-            self.lru_tracker.insert(*key).await;
+            self.lru_tracker.insert(*key);
+        }
+
+        if size_delta > 0 {
+            self.shared
+                .size_bytes
+                .fetch_add(size_delta as usize, std::sync::atomic::Ordering::Relaxed);
+        } else if size_delta < 0 {
+            self.shared.size_bytes.fetch_sub(
+                size_delta.abs() as usize,
+                std::sync::atomic::Ordering::Relaxed,
+            );
         }
 
         // Epic, we did it.

From e07d5be8080ba5f14ff7fdb31ca2f395360214ef Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Fri, 13 Feb 2026 17:12:15 -0800
Subject: [PATCH 08/33] deterministic clanker feedback

---
 lib/cache/eviction/lru.rs | 26 +++++++++++++-------------
 lib/cache/fcache.rs       | 13 +++++++------
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/lib/cache/eviction/lru.rs b/lib/cache/eviction/lru.rs
index f821bc3..4890d82 100644
--- a/lib/cache/eviction/lru.rs
+++ b/lib/cache/eviction/lru.rs
@@ -4,7 +4,6 @@ use std::{
     collections::HashSet,
     future::Future,
     hash::Hash,
-    pin::Pin,
     sync::{Arc, OnceLock, atomic::AtomicU64},
 };
 
@@ -26,7 +25,7 @@ impl DeletionIndicator {
     /// Call to mark a batch as being processed right now.
     fn process_batch(&self, count: u32) {
         self.underlying.fetch_add(
-            count as u64 - (1 << 32),
+            u64::from(count) - (1 << 32),
             std::sync::atomic::Ordering::Relaxed,
         );
     }
@@ -66,7 +65,7 @@ pub trait Deleter<K>: Send + Clone + 'static {
 }
 
 /// Messages sent to the LRU eviction tracker worker.
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Copy)]
 enum Message<K> {
     /// Notify the LRU eviction tracker that the given key was accessed.
     Accessed(K),
@@ -182,6 +181,7 @@ impl<K: Copy + Eq + Hash + Send + 'static, D: Deleter<K>> LruProcessingTask<K, D
                 {
                     // These integer casts are safe, since max_count is guaranteed to fit
                     // within a u32, min(MAX_U32, MAX_USIZE) == MAX_U32.
+                    #[expect(clippy::cast_possible_truncation)]
                     let take_count = self.ordered_key_set.len().min(max_count as usize) as u32;
                     self.shared.active_deletions.process_batch(take_count);
                     for _ in 0..take_count {
@@ -190,7 +190,7 @@ impl<K: Copy + Eq + Hash + Send + 'static, D: Deleter<K>> LruProcessingTask<K, D
                         };
                         self.evicted_keys.insert(k);
                         let mut deleter = self.deleter.clone();
-                        let shared_clone = self.shared.clone();
+                        let shared_clone = Arc::clone(&self.shared);
                         tokio::spawn(async move {
                             deleter.delete(k).await;
                             shared_clone.active_deletions.observe_deletion();
@@ -231,7 +231,7 @@ impl<K: Copy + Eq + Hash + Send + 'static, D: Deleter<K>> LruProcessingTask<K, D
 #[derive(Debug)]
 struct WorkerState {
     active_deletions: DeletionIndicator,
-    _worker: OnceLock<JoinHandle<()>>,
+    worker: OnceLock<JoinHandle<()>>,
 }
 
 /// An LRU eviction tracker. This is used to track the least recently used keys in the cache, and
@@ -249,10 +249,12 @@ impl<K: Copy + Eq + Send + Hash + 'static> LruEvictionTracker<K> {
 
         let worker_state = Arc::new(WorkerState {
             active_deletions: DeletionIndicator::default(),
-            _worker: OnceLock::new(),
+            worker: OnceLock::new(),
         });
-        let worker = LruProcessingTask::spawn_task(deleter, rx, worker_state.clone());
-        worker_state._worker.set(worker).unwrap();
+        let worker = LruProcessingTask::spawn_task(deleter, rx, Arc::clone(&worker_state));
+        if worker_state.worker.set(worker).is_err() {
+            unreachable!("worker should only be set once, and we just set it");
+        }
 
         Self {
             worker_message_sender: tx,
@@ -283,17 +285,14 @@ impl<K: Copy + Eq + Send + Hash + 'static> LruEvictionTracker<K> {
     }
 
     /// Check whether there are culls that are already scheduled.
+    #[must_use]
     pub fn have_pending_culls(&self) -> bool {
         self.worker_state.active_deletions.have_pending_batches()
     }
 
     /// Send a message to the worker. This is a helper method to reduce code duplication.
     fn send_msg(&self, message: Message<K>) {
-        if self
-            .worker_message_sender
-            .blocking_send(message.clone())
-            .is_err()
-        {
+        if self.worker_message_sender.blocking_send(message).is_err() {
             unreachable!();
         }
     }
@@ -309,6 +308,7 @@ impl<K: Copy + Eq + Send + Hash + 'static> LruEvictionTracker<K> {
     /// provides is whether any deletions are pending or not, but it is not safe to use this to
     /// synchronize upon the results of your deletions, since the underlying atomic is used in a
     /// relaxed manner.
+    #[must_use]
     pub fn have_active_deletions(&self) -> bool {
         self.worker_state.active_deletions.have_pending_deletions()
     }
diff --git a/lib/cache/fcache.rs b/lib/cache/fcache.rs
index ea1b8ee..281d7d1 100644
--- a/lib/cache/fcache.rs
+++ b/lib/cache/fcache.rs
@@ -354,14 +354,14 @@ impl<K: Eq + Hash + Copy + Send + Sync + 'static> AsyncWritableCache<K, Vec<u8>,
             .await?;
         new_file.write_all(&value).await?;
 
-        let mut size_delta: isize = new_entry.size_bytes as isize;
+        let mut size_delta: isize = new_entry.size_bytes.cast_signed();
 
         // Now we insert the new file ID into the map, and get the old file ID if it exists.
         if let Some(old_entry) = self.shared.map.upsert_sync(*key, new_entry) {
             // If there was an old file ID, we need to delete the old file and notify the LRU
             // tracker that the key was accessed.
             self.lru_tracker.access(*key);
-            size_delta -= old_entry.size_bytes as isize;
+            size_delta -= old_entry.size_bytes.cast_signed();
 
             // TODO(markovejnovic): Could stack allocate the path.
             // Note that the file already may be deleted at this point -- the LRU tracker deleter
@@ -373,12 +373,13 @@ impl<K: Eq + Hash + Copy + Send + Sync + 'static> AsyncWritableCache<K, Vec<u8>,
         }
 
         if size_delta > 0 {
-            self.shared
-                .size_bytes
-                .fetch_add(size_delta as usize, std::sync::atomic::Ordering::Relaxed);
+            self.shared.size_bytes.fetch_add(
+                size_delta.cast_unsigned(),
+                std::sync::atomic::Ordering::Relaxed,
+            );
         } else if size_delta < 0 {
             self.shared.size_bytes.fetch_sub(
-                size_delta.abs() as usize,
+                size_delta.unsigned_abs(),
                 std::sync::atomic::Ordering::Relaxed,
             );
         }

From b2c454a904a76a74ed12dfacf41dad0ca868331c Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Fri, 13 Feb 2026 18:22:34 -0800
Subject: [PATCH 09/33] more clanker comments

---
 lib/cache/eviction/lru.rs | 129 ++++++++++++++++++++++----------------
 lib/cache/fcache.rs       |  41 +++++++++---
 2 files changed, 108 insertions(+), 62 deletions(-)

diff --git a/lib/cache/eviction/lru.rs b/lib/cache/eviction/lru.rs
index 4890d82..fa0e269 100644
--- a/lib/cache/eviction/lru.rs
+++ b/lib/cache/eviction/lru.rs
@@ -1,18 +1,20 @@
 //! Implements the LRU eviction policy.
 
 use std::{
-    collections::HashSet,
     future::Future,
     hash::Hash,
-    sync::{Arc, OnceLock, atomic::AtomicU64},
+    sync::{
+        Arc, OnceLock,
+        atomic::{AtomicI64, AtomicUsize},
+    },
 };
 
-use hashlink::LinkedHashSet;
+use hashlink::LinkedHashMap;
 use tokio::{sync::mpsc::Receiver, task::JoinHandle};
 
 #[derive(Debug)]
 struct DeletionIndicator {
-    underlying: AtomicU64,
+    underlying: AtomicI64,
 }
 
 impl DeletionIndicator {
@@ -25,7 +27,7 @@ impl DeletionIndicator {
     /// Call to mark a batch as being processed right now.
     fn process_batch(&self, count: u32) {
         self.underlying.fetch_add(
-            u64::from(count) - (1 << 32),
+            i64::from(count) - (1 << 32),
             std::sync::atomic::Ordering::Relaxed,
         );
     }
@@ -38,7 +40,7 @@ impl DeletionIndicator {
 
     /// Check if there are any scheduled or active deletions.
     fn have_pending_deletions(&self) -> bool {
-        self.underlying.load(std::sync::atomic::Ordering::Relaxed) == 0
+        self.underlying.load(std::sync::atomic::Ordering::Relaxed) != 0
     }
 
     /// Check if there are any scheduled batches of deletions.
@@ -50,41 +52,37 @@ impl DeletionIndicator {
 impl Default for DeletionIndicator {
     fn default() -> Self {
         Self {
-            underlying: AtomicU64::new(0),
+            underlying: AtomicI64::new(0),
         }
     }
 }
 
 /// A trait for deleting keys from the cache. This is used by the LRU eviction tracker to delete
 /// keys when they are evicted.
-pub trait Deleter<K>: Send + Clone + 'static {
+pub trait Deleter<K, Ctx>: Send + Clone + 'static {
     /// Delete the given keys from the cache. The keys are guaranteed to be in the order of
     /// eviction. You absolutely MUST delete the keys in the order they are given, otherwise the
     /// LRU eviction tracker will get very confused and break.
-    fn delete(&mut self, key: K) -> impl Future<Output = ()> + Send;
+    fn delete(&mut self, key: K, ctx: Ctx) -> impl Future<Output = ()> + Send;
 }
 
 /// Messages sent to the LRU eviction tracker worker.
 #[derive(Debug, Clone, Copy)]
-enum Message<K> {
+enum Message<K, C> {
     /// Notify the LRU eviction tracker that the given key was accessed.
-    Accessed(K),
+    Accessed(K, usize),
     /// Request an eviction set of the given size.
     Evict(u32),
     /// Notify the LRU eviction tracker that a given key was inserted.
-    Inserted(K),
+    Inserted(K, C),
 }
 
 #[derive(Debug)]
-struct LruProcessingTask<K: Copy, D: Deleter<K>> {
-    receiver: Receiver<Message<K>>,
+struct LruProcessingTask<K: Copy, C, D: Deleter<K, C>> {
+    receiver: Receiver<Message<K, C>>,
 
     /// The ordered set of keys, ordered according to the last-used policy.
-    ordered_key_set: LinkedHashSet<K>,
-
-    /// Tracks which keys are currently considered evicted. Read the long explanation in
-    /// `service_message` for why this is necessary.
-    evicted_keys: HashSet<K>,
+    ordered_key_map: LinkedHashMap<K, C>,
 
     /// The deleter to call when we need to evict keys.
     deleter: D,
@@ -93,12 +91,13 @@ struct LruProcessingTask<K: Copy, D: Deleter<K>> {
     shared: Arc<WorkerState>,
 }
 
-impl<K: Copy + Eq + Hash + Send + 'static, D: Deleter<K>> LruProcessingTask<K, D> {
-    fn new(deleter: D, receiver: Receiver<Message<K>>, shared: Arc<WorkerState>) -> Self {
+impl<K: Copy + Eq + Hash + Send + 'static, C: Send + 'static, D: Deleter<K, C>>
+    LruProcessingTask<K, C, D>
+{
+    fn new(deleter: D, receiver: Receiver<Message<K, C>>, shared: Arc<WorkerState>) -> Self {
         Self {
             receiver,
-            ordered_key_set: LinkedHashSet::new(),
-            evicted_keys: HashSet::new(),
+            ordered_key_map: LinkedHashMap::new(),
             deleter,
             shared,
         }
@@ -106,7 +105,7 @@ impl<K: Copy + Eq + Hash + Send + 'static, D: Deleter<K>> LruProcessingTask<K, D
 
     fn spawn_task(
         deleter: D,
-        receiver: Receiver<Message<K>>,
+        receiver: Receiver<Message<K, C>>,
         shared: Arc<WorkerState>,
     ) -> JoinHandle<()> {
         // TODO(markovejnovic): This should have a best-effort drop.
@@ -124,18 +123,32 @@ impl<K: Copy + Eq + Hash + Send + 'static, D: Deleter<K>> LruProcessingTask<K, D
 
     /// Returns true if the task should continue working.
     #[must_use]
-    fn service_message(&mut self, message: Message<K>) -> bool {
+    fn service_message(&mut self, message: Message<K, C>) -> bool {
         match message {
-            Message::Accessed(k) => {
-                if self.evicted_keys.contains(&k) {
-                    // This is a ghost access, happens when a client accesses a key that may not
-                    // have been cleaned up yet. Just ignore it.
+            Message::Accessed(k, ev_gen) => {
+                if ev_gen
+                    < self
+                        .shared
+                        .eviction_generation
+                        .load(std::sync::atomic::Ordering::Relaxed)
+                {
+                    // This is a ghost access. Happens when one client sends an eviction, but other
+                    // clients add accesses to the same keys, before our worker thread had a chance
+                    // to clean up.
+                    //
+                    // We're looking at an access in the "future".
                     return true;
                 }
 
                 self.reposition_existing_key(k);
             }
             Message::Evict(max_count) => {
+                // We got an eviction notice. Bump the eviction generation to indicate that all
+                // accesses after this point are considered in the "future". Read the subsequent
+                // comment for an explanation of why we need to do this.
+                self.shared
+                    .eviction_generation
+                    .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
                 // Before we send off the eviction set, we actually need to remove the evicted
                 // keys from the pending queue. Read the following for an explanation.
                 //
@@ -175,6 +188,15 @@ impl<K: Copy + Eq + Hash + Send + 'static, D: Deleter<K>> LruProcessingTask<K, D
                 // so the best we can do is simply mark the stupid keys as "evicted" and ignore
                 // any accesses from these keys until they're marked as "inserted" again.
                 //
+                // This is what the generation counter does -- it allows us to track which messages
+                // are considered "stale". There are some problems with the generation counter,
+                // however. We drop all messages that are "generationally old". Under high
+                // contention, this results in the eviction policy acting like a fuzzy LRU policy,
+                // rather than a strict LRU policy, but for the purposes of this cache, this is an
+                // acceptable tradeoff.
+                //
+                // TODO(markovejnovic): Make this a strict LRU.
+                //
                 // We cannot mutate the queue to mark messages as stale, so the best we can do
                 // is mark the keys as "evicted" and ignore any accesses from these keys until
                 // they're marked as "inserted" again.
@@ -182,35 +204,28 @@ impl<K: Copy + Eq + Hash + Send + 'static, D: Deleter<K>> LruProcessingTask<K, D
                     // These integer casts are safe, since max_count is guaranteed to fit
                     // within a u32, min(MAX_U32, MAX_USIZE) == MAX_U32.
                     #[expect(clippy::cast_possible_truncation)]
-                    let take_count = self.ordered_key_set.len().min(max_count as usize) as u32;
+                    let take_count = self.ordered_key_map.len().min(max_count as usize) as u32;
                     self.shared.active_deletions.process_batch(take_count);
                     for _ in 0..take_count {
-                        let Some(k) = self.ordered_key_set.pop_front() else {
+                        let Some(e) = self.ordered_key_map.pop_front() else {
                             break;
                         };
-                        self.evicted_keys.insert(k);
                         let mut deleter = self.deleter.clone();
                         let shared_clone = Arc::clone(&self.shared);
                         tokio::spawn(async move {
-                            deleter.delete(k).await;
+                            deleter.delete(e.0, e.1).await;
                             shared_clone.active_deletions.observe_deletion();
                         });
                     }
                 }
             }
-            Message::Inserted(k) => {
+            Message::Inserted(k, ctx) => {
                 debug_assert!(
-                    !self.ordered_key_set.contains(&k),
+                    !self.ordered_key_map.contains_key(&k),
                     "key must not already exist in the ordered set when inserting"
                 );
 
-                // If the key has been evicted, but is now inserted, that means the key is no
-                // longer stale.
-                if self.evicted_keys.contains(&k) {
-                    self.evicted_keys.remove(&k);
-                }
-
-                self.ordered_key_set.insert(k);
+                self.ordered_key_map.insert(k, ctx);
             }
         }
 
@@ -219,36 +234,39 @@ impl<K: Copy + Eq + Hash + Send + 'static, D: Deleter<K>> LruProcessingTask<K, D
 
     fn reposition_existing_key(&mut self, key: K) {
         debug_assert!(
-            self.ordered_key_set.contains(&key),
+            self.ordered_key_map.contains_key(&key),
             "key must exist in the ordered set before repositioning"
         );
 
-        self.ordered_key_set.remove(&key);
-        self.ordered_key_set.insert(key);
+        if let Some(c) = self.ordered_key_map.remove(&key) {
+            self.ordered_key_map.insert(key, c);
+        }
     }
 }
 
 #[derive(Debug)]
 struct WorkerState {
     active_deletions: DeletionIndicator,
+    eviction_generation: AtomicUsize,
     worker: OnceLock<JoinHandle<()>>,
 }
 
 /// An LRU eviction tracker. This is used to track the least recently used keys in the cache, and
 /// to evict keys when necessary.
 #[derive(Debug)]
-pub struct LruEvictionTracker<K> {
-    worker_message_sender: tokio::sync::mpsc::Sender<Message<K>>,
+pub struct LruEvictionTracker<K, C> {
+    worker_message_sender: tokio::sync::mpsc::Sender<Message<K, C>>,
     worker_state: Arc<WorkerState>,
 }
 
-impl<K: Copy + Eq + Send + Hash + 'static> LruEvictionTracker<K> {
+impl<K: Copy + Eq + Send + Hash + 'static, C: Send + 'static> LruEvictionTracker<K, C> {
     /// Spawn a new LRU eviction tracker with the given deleter and channel size.
-    pub fn spawn<D: Deleter<K>>(deleter: D, channel_size: usize) -> Self {
+    pub fn spawn<D: Deleter<K, C>>(deleter: D, channel_size: usize) -> Self {
         let (tx, rx) = tokio::sync::mpsc::channel(channel_size);
 
         let worker_state = Arc::new(WorkerState {
             active_deletions: DeletionIndicator::default(),
+            eviction_generation: AtomicUsize::new(0),
             worker: OnceLock::new(),
         });
         let worker = LruProcessingTask::spawn_task(deleter, rx, Arc::clone(&worker_state));
@@ -265,15 +283,20 @@ impl<K: Copy + Eq + Send + Hash + 'static> LruEvictionTracker<K> {
     /// Notify the LRU eviction tracker that the given key was inserted.
     ///
     /// You MUST call this method for every new key that is inserted into the cache.
-    pub fn insert(&self, key: K) {
-        self.send_msg(Message::Inserted(key));
+    pub fn insert(&self, key: K, ctx: C) {
+        self.send_msg(Message::Inserted(key, ctx));
     }
 
     /// Notify the LRU eviction tracker that the given key was accessed.
     ///
     /// You MUST call this method for every read or update to a key.
     pub fn access(&self, key: K) {
-        self.send_msg(Message::Accessed(key));
+        self.send_msg(Message::Accessed(
+            key,
+            self.worker_state
+                .eviction_generation
+                .load(std::sync::atomic::Ordering::Relaxed),
+        ));
     }
 
     /// Cull the least recently used keys with the given deletion method.
@@ -291,7 +314,7 @@ impl<K: Copy + Eq + Send + Hash + 'static> LruEvictionTracker<K> {
     }
 
     /// Send a message to the worker. This is a helper method to reduce code duplication.
-    fn send_msg(&self, message: Message<K>) {
+    fn send_msg(&self, message: Message<K, C>) {
         if self.worker_message_sender.blocking_send(message).is_err() {
             unreachable!();
         }
diff --git a/lib/cache/fcache.rs b/lib/cache/fcache.rs
index 281d7d1..b002ec1 100644
--- a/lib/cache/fcache.rs
+++ b/lib/cache/fcache.rs
@@ -49,6 +49,11 @@ impl<K: Eq + Hash + Send + Sync> FileCacheShared<K> {
     }
 }
 
+#[derive(Debug, Clone)]
+struct DeleterCtx {
+    fid: usize,
+}
+
 #[derive(Clone)]
 struct LruEntryDeleter<K: Eq + Hash> {
     shared: Arc<FileCacheShared<K>>,
@@ -60,10 +65,15 @@ impl<K: Eq + Hash> LruEntryDeleter<K> {
     }
 }
 
-impl<K: Copy + Eq + Hash + Sync + Send + 'static> Deleter<K> for LruEntryDeleter<K> {
+impl<K: Copy + Eq + Hash + Sync + Send + 'static> Deleter<K, DeleterCtx> for LruEntryDeleter<K> {
     /// The LRU cache will call this method when it wants to evict keys.
-    async fn delete(&mut self, key: K) {
-        if let Some(entry) = self.shared.map.remove_sync(&key) {
+    async fn delete(&mut self, key: K, ctx: DeleterCtx) {
+        if let Some(entry) = self
+            .shared
+            .map
+            .remove_if_async(&key, |m| m.fid == ctx.fid)
+            .await
+        {
             if let Err(e) = self.shared.delete_entry_from_disk_async(&entry.1).await {
                 error!(error = ?e, "failed to delete evicted cache entry from disk");
             }
@@ -132,7 +142,7 @@ pub struct FileCache<K: Eq + Hash> {
 
     /// LRU eviction tracker. This is used to track the least recently used keys in the cache, and
     /// to evict keys when necessary.
-    lru_tracker: LruEvictionTracker<K>,
+    lru_tracker: LruEvictionTracker<K, DeleterCtx>,
 }
 
 impl<K: Eq + Hash + Send + Sync + Copy + 'static> FileCache<K> {
@@ -315,8 +325,8 @@ impl<K: Eq + Hash + Copy + Send + Sync + Debug + 'static> AsyncReadableCache<K,
     }
 }
 
-impl<K: Eq + Hash + Copy + Send + Sync + 'static> AsyncWritableCache<K, Vec<u8>, CacheWriteError>
-    for FileCache<K>
+impl<K: Eq + Hash + Copy + Send + Sync + 'static + Debug>
+    AsyncWritableCache<K, Vec<u8>, CacheWriteError> for FileCache<K>
 {
     async fn insert(&self, key: &K, value: Vec<u8>) -> Result<(), CacheWriteError> {
         // Inserts are tricky. The first thing we'll do is find a new file handle for the new
@@ -329,6 +339,14 @@ impl<K: Eq + Hash + Copy + Send + Sync + 'static> AsyncWritableCache<K, Vec<u8>,
         };
 
         while self.shared.size() + new_entry.size_bytes > self.max_size_bytes {
+            if self.shared.size() == 0 {
+                // This means that the new entry is larger than the entire cache size limit. In
+                // this case, we have no choice but to just insert the entry and let the cache
+                // temporarily exceed the size limit. We will try to evict entries as soon as
+                // possible, but in the meantime, we need to at least insert this entry.
+                break;
+            }
+
             // TODO(markovejnovic): This whole spinlock situation sounded better in my head, but
             // realistically, I think I could've gotten away with just a plain Notify. It is what
             // it is now.
@@ -340,6 +358,8 @@ impl<K: Eq + Hash + Copy + Send + Sync + 'static> AsyncWritableCache<K, Vec<u8>,
             // the cache than necessary, which results in a regression.
             if self.lru_tracker.have_pending_culls() {
                 // If there are any pending culls, then we need to just wait.
+                // TODO(markovejnovic): This could be nicer and maybe starve the CPU less. Chances
+                // are we actually need to yield to the LRU tracker task.
                 tokio::task::yield_now().await;
                 continue;
             }
@@ -357,7 +377,7 @@ impl<K: Eq + Hash + Copy + Send + Sync + 'static> AsyncWritableCache<K, Vec<u8>,
         let mut size_delta: isize = new_entry.size_bytes.cast_signed();
 
         // Now we insert the new file ID into the map, and get the old file ID if it exists.
-        if let Some(old_entry) = self.shared.map.upsert_sync(*key, new_entry) {
+        if let Some(old_entry) = self.shared.map.upsert_async(*key, new_entry).await {
             // If there was an old file ID, we need to delete the old file and notify the LRU
             // tracker that the key was accessed.
             self.lru_tracker.access(*key);
@@ -367,9 +387,12 @@ impl<K: Eq + Hash + Copy + Send + Sync + 'static> AsyncWritableCache<K, Vec<u8>,
             // Note that the file already may be deleted at this point -- the LRU tracker deleter
             // may have already deleted this file, so if the file doesn't exist, that's already
             // fine with us.
-            self.shared.delete_entry_from_disk_async(&old_entry).await?;
+            if let Err(e) = self.shared.delete_entry_from_disk_async(&old_entry).await {
+                error!(error = ?e, key = ?key, "failed to delete old cache entry from disk");
+            }
         } else {
-            self.lru_tracker.insert(*key);
+            self.lru_tracker
+                .insert(*key, DeleterCtx { fid: new_entry.fid });
         }
 
         if size_delta > 0 {

From 8379ebb867e45e97e058e56780b913888e730841 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Sun, 15 Feb 2026 19:10:21 -0800
Subject: [PATCH 10/33] Fix edge-case bugs

---
 lib/cache/eviction/lru.rs | 157 ++++++++++++++++++++++----------------
 lib/cache/fcache.rs       |  42 +++++++---
 2 files changed, 124 insertions(+), 75 deletions(-)

diff --git a/lib/cache/eviction/lru.rs b/lib/cache/eviction/lru.rs
index fa0e269..e8f3917 100644
--- a/lib/cache/eviction/lru.rs
+++ b/lib/cache/eviction/lru.rs
@@ -12,19 +12,67 @@ use std::{
 use hashlink::LinkedHashMap;
 use tokio::{sync::mpsc::Receiver, task::JoinHandle};
 
+/// A trait for deleting keys from the cache. This is used by the LRU eviction tracker to delete
+/// keys when they are evicted.
+pub trait Deleter<K, Ctx>: Send + Clone + 'static {
+    /// Delete the given keys from the cache. The keys are guaranteed to be in the order of
+    /// eviction. You absolutely MUST delete the keys in the order they are given, otherwise the
+    /// LRU eviction tracker will get very confused and break.
+    fn delete(&mut self, key: K, ctx: Ctx) -> impl Future<Output = ()> + Send;
+}
+
+/// Messages sent to the LRU eviction tracker worker.
+#[derive(Debug, Clone, Copy)]
+enum Message<K, C> {
+    /// Notify the LRU eviction tracker that the given key was accessed.
+    Accessed(K, usize),
+    /// Request an eviction set of the given size.
+    Evict(u32),
+    /// Notify the LRU eviction tracker that a given key was inserted.
+    Inserted(K, C),
+}
+
+/// Tracks in-flight eviction batches and individual deletions using a single packed `AtomicI64`.
+///
+/// Layout: `[upper 32 bits: pending batches | lower 32 bits: active deletions]`
+///
+/// The lifecycle of an eviction is:
+/// 1. Producer calls `submit_batch()` — increments upper half by 1.
+/// 2. Producer enqueues the `Evict` message into the channel.
+/// 3. Worker receives the message and calls `process_batch(n)` — decrements upper half by 1 and
+///    increments lower half by `n` (the actual number of keys to delete).
+/// 4. Each spawned deletion task calls `observe_deletion()` when done — decrements lower half by 1.
+///
+/// The indicator reads zero only when no batches are pending and no deletions are in flight.
 #[derive(Debug)]
 struct DeletionIndicator {
     underlying: AtomicI64,
 }
 
 impl DeletionIndicator {
-    /// Call to mark the start of a batch of deletions.
+    fn new() -> Self {
+        Self {
+            underlying: AtomicI64::new(0),
+        }
+    }
+
+    /// Mark that a new eviction batch has been submitted by a producer. Called on the producer
+    /// side *before* the `Evict` message is sent into the channel.
     fn submit_batch(&self) {
         self.underlying
             .fetch_add(1 << 32, std::sync::atomic::Ordering::Relaxed);
     }
 
-    /// Call to mark a batch as being processed right now.
+    /// Undo a `submit_batch` call. Used when the channel `try_send` fails after we already
+    /// incremented the pending-batch counter.
+    fn undo_submit_batch(&self) {
+        self.underlying
+            .fetch_sub(1 << 32, std::sync::atomic::Ordering::Relaxed);
+    }
+
+    /// Called by the worker when it begins processing an eviction batch. Atomically decrements
+    /// the pending-batch counter (upper half) by 1 and increments the active-deletion counter
+    /// (lower half) by `count`.
     fn process_batch(&self, count: u32) {
         self.underlying.fetch_add(
             i64::from(count) - (1 << 32),
@@ -32,51 +80,19 @@ impl DeletionIndicator {
         );
     }
 
-    /// Call to mark a deletion as being completed.
+    /// Called by a spawned deletion task when it finishes deleting one key.
     fn observe_deletion(&self) {
         self.underlying
             .fetch_sub(1, std::sync::atomic::Ordering::Relaxed);
     }
 
-    /// Check if there are any scheduled or active deletions.
-    fn have_pending_deletions(&self) -> bool {
-        self.underlying.load(std::sync::atomic::Ordering::Relaxed) != 0
-    }
-
-    /// Check if there are any scheduled batches of deletions.
+    /// Returns `true` if there are any pending batches (submitted but not yet processed by the
+    /// worker). Used by producers to avoid enqueuing redundant eviction requests.
     fn have_pending_batches(&self) -> bool {
         self.underlying.load(std::sync::atomic::Ordering::Relaxed) >= 1 << 32
     }
 }
 
-impl Default for DeletionIndicator {
-    fn default() -> Self {
-        Self {
-            underlying: AtomicI64::new(0),
-        }
-    }
-}
-
-/// A trait for deleting keys from the cache. This is used by the LRU eviction tracker to delete
-/// keys when they are evicted.
-pub trait Deleter<K, Ctx>: Send + Clone + 'static {
-    /// Delete the given keys from the cache. The keys are guaranteed to be in the order of
-    /// eviction. You absolutely MUST delete the keys in the order they are given, otherwise the
-    /// LRU eviction tracker will get very confused and break.
-    fn delete(&mut self, key: K, ctx: Ctx) -> impl Future<Output = ()> + Send;
-}
-
-/// Messages sent to the LRU eviction tracker worker.
-#[derive(Debug, Clone, Copy)]
-enum Message<K, C> {
-    /// Notify the LRU eviction tracker that the given key was accessed.
-    Accessed(K, usize),
-    /// Request an eviction set of the given size.
-    Evict(u32),
-    /// Notify the LRU eviction tracker that a given key was inserted.
-    Inserted(K, C),
-}
-
 #[derive(Debug)]
 struct LruProcessingTask<K: Copy, C, D: Deleter<K, C>> {
     receiver: Receiver<Message<K, C>>,
@@ -205,7 +221,13 @@ impl<K: Copy + Eq + Hash + Send + 'static, C: Send + 'static, D: Deleter<K, C>>
                     // within a u32, min(MAX_U32, MAX_USIZE) == MAX_U32.
                     #[expect(clippy::cast_possible_truncation)]
                     let take_count = self.ordered_key_map.len().min(max_count as usize) as u32;
+
+                    // Atomically transition this batch from "pending" to "active deletions".
+                    // This decrements the upper half (pending batches) by 1 and increments the
+                    // lower half (active deletions) by take_count. If take_count is 0, this
+                    // still correctly clears the pending batch.
                     self.shared.active_deletions.process_batch(take_count);
+
                     for _ in 0..take_count {
                         let Some(e) = self.ordered_key_map.pop_front() else {
                             break;
@@ -246,6 +268,8 @@ impl<K: Copy + Eq + Hash + Send + 'static, C: Send + 'static, D: Deleter<K, C>>
 
 #[derive(Debug)]
 struct WorkerState {
+    /// Packed atomic tracking pending eviction batches (upper 32 bits) and active deletions
+    /// (lower 32 bits). See `DeletionIndicator` for the full protocol.
     active_deletions: DeletionIndicator,
     eviction_generation: AtomicUsize,
     worker: OnceLock<JoinHandle<()>>,
@@ -265,7 +289,7 @@ impl<K: Copy + Eq + Send + Hash + 'static, C: Send + 'static> LruEvictionTracker
         let (tx, rx) = tokio::sync::mpsc::channel(channel_size);
 
         let worker_state = Arc::new(WorkerState {
-            active_deletions: DeletionIndicator::default(),
+            active_deletions: DeletionIndicator::new(),
             eviction_generation: AtomicUsize::new(0),
             worker: OnceLock::new(),
         });
@@ -283,56 +307,57 @@ impl<K: Copy + Eq + Send + Hash + 'static, C: Send + 'static> LruEvictionTracker
     /// Notify the LRU eviction tracker that the given key was inserted.
     ///
     /// You MUST call this method for every new key that is inserted into the cache.
-    pub fn insert(&self, key: K, ctx: C) {
-        self.send_msg(Message::Inserted(key, ctx));
+    pub async fn insert(&self, key: K, ctx: C) {
+        self.send_msg(Message::Inserted(key, ctx)).await;
     }
 
     /// Notify the LRU eviction tracker that the given key was accessed.
     ///
     /// You MUST call this method for every read or update to a key.
-    pub fn access(&self, key: K) {
+    pub async fn access(&self, key: K) {
         self.send_msg(Message::Accessed(
             key,
             self.worker_state
                 .eviction_generation
                 .load(std::sync::atomic::Ordering::Relaxed),
-        ));
+        ))
+        .await;
     }
 
-    /// Cull the least recently used keys with the given deletion method.
-    pub fn cull(&self, max_count: u32) {
-        // Culling sets the top-most bit of the active deletion count to indicate that there are
-        // queued deletions.
+    /// Try to cull the least recently used keys with the given deletion method.
+    ///
+    /// Returns `true` if the eviction message was successfully enqueued, or `false` if the
+    /// channel is full. In the latter case, the caller should yield and retry.
+    ///
+    /// The ordering here is critical for correctness: we `submit_batch()` *before* `try_send()`
+    /// so that the `DeletionIndicator` is always in a state where `have_pending_batches()` returns
+    /// `true` by the time anyone observes the enqueued message. If the send fails, we roll back
+    /// with `undo_submit_batch()`, which is a net-zero delta on the packed atomic.
+    #[must_use]
+    pub fn try_cull(&self, max_count: u32) -> bool {
         self.worker_state.active_deletions.submit_batch();
-        self.send_msg(Message::Evict(max_count));
+        if self
+            .worker_message_sender
+            .try_send(Message::Evict(max_count))
+            .is_ok()
+        {
+            true
+        } else {
+            self.worker_state.active_deletions.undo_submit_batch();
+            false
+        }
     }
 
-    /// Check whether there are culls that are already scheduled.
+    /// Check whether there are culls that are already scheduled or actively in progress.
     #[must_use]
     pub fn have_pending_culls(&self) -> bool {
         self.worker_state.active_deletions.have_pending_batches()
     }
 
     /// Send a message to the worker. This is a helper method to reduce code duplication.
-    fn send_msg(&self, message: Message<K, C>) {
-        if self.worker_message_sender.blocking_send(message).is_err() {
+    async fn send_msg(&self, message: Message<K, C>) {
+        if self.worker_message_sender.send(message).await.is_err() {
             unreachable!();
         }
     }
-
-    /// Get the total number of currently active deletions.
-    ///
-    /// Useful if you need to spinlock on this, for whatever reason.
-    ///
-    ///
-    /// # Note
-    ///
-    /// Does not guarantee ordering in respect to the deletion. The only guarantee this method
-    /// provides is whether any deletions are pending or not, but it is not safe to use this to
-    /// synchronize upon the results of your deletions, since the underlying atomic is used in a
-    /// relaxed manner.
-    #[must_use]
-    pub fn have_active_deletions(&self) -> bool {
-        self.worker_state.active_deletions.have_pending_deletions()
-    }
 }
diff --git a/lib/cache/fcache.rs b/lib/cache/fcache.rs
index b002ec1..13b35fa 100644
--- a/lib/cache/fcache.rs
+++ b/lib/cache/fcache.rs
@@ -20,6 +20,23 @@ use thiserror::Error;
 
 use tokio::io::{AsyncReadExt as _, AsyncWriteExt as _};
 
+/// Drop guard that deletes an unregistered cache file if the insert task is cancelled or fails
+/// between file creation and map registration. Defused once the file is registered in the map.
+struct FileGuard {
+    path: Option<PathBuf>,
+}
+
+impl Drop for FileGuard {
+    fn drop(&mut self) {
+        if let Some(path) = self.path.take()
+            && let Err(e) = std::fs::remove_file(&path)
+            && e.kind() != std::io::ErrorKind::NotFound
+        {
+            error!(error = ?e, path = ?path, "failed to clean up orphaned cache file");
+        }
+    }
+}
+
 struct FileCacheShared<K: Eq + Hash> {
     root_path: PathBuf,
     map: scc::HashMap<K, CacheMapEntry>,
@@ -310,7 +327,7 @@ impl<K: Eq + Hash + Copy + Send + Sync + Debug + 'static> AsyncReadableCache<K,
                     error!(error = ?e, key = ?key, "IO error while reading file for cache key");
                 })
                 .ok()?;
-            self.lru_tracker.access(*key);
+            self.lru_tracker.access(*key).await;
             return Some(buf);
         }
 
@@ -366,21 +383,27 @@ impl<K: Eq + Hash + Copy + Send + Sync + 'static + Debug>
 
             // There are no culls in progress, but the cache is still too full for this new entry,
             // which means we need to evict some entries.
-            self.lru_tracker.cull(Self::LRU_EVICTION_MAX_BATCH_SIZE);
+            if !self.lru_tracker.try_cull(Self::LRU_EVICTION_MAX_BATCH_SIZE) {
+                tokio::task::yield_now().await;
+            }
         }
 
-        let mut new_file = self
-            .create_file(&self.shared.path_for(new_entry.fid))
-            .await?;
+        let path = self.shared.path_for(new_entry.fid);
+        let mut new_file = self.create_file(&path).await?;
+        let mut guard = FileGuard { path: Some(path) };
         new_file.write_all(&value).await?;
 
         let mut size_delta: isize = new_entry.size_bytes.cast_signed();
 
-        // Now we insert the new file ID into the map, and get the old file ID if it exists.
-        if let Some(old_entry) = self.shared.map.upsert_async(*key, new_entry).await {
+        // Register the file in the map. After this point, the map owns the file and the guard
+        // must not delete it.
+        let old_entry = self.shared.map.upsert_async(*key, new_entry).await;
+        guard.path = None;
+
+        if let Some(old_entry) = old_entry {
             // If there was an old file ID, we need to delete the old file and notify the LRU
             // tracker that the key was accessed.
-            self.lru_tracker.access(*key);
+            self.lru_tracker.access(*key).await;
             size_delta -= old_entry.size_bytes.cast_signed();
 
             // TODO(markovejnovic): Could stack allocate the path.
@@ -392,7 +415,8 @@ impl<K: Eq + Hash + Copy + Send + Sync + 'static + Debug>
             }
         } else {
             self.lru_tracker
-                .insert(*key, DeleterCtx { fid: new_entry.fid });
+                .insert(*key, DeleterCtx { fid: new_entry.fid })
+                .await;
         }
 
         if size_delta > 0 {

From c741f6c4cca11ad05b89d1d1091844b6bb8f04de Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Sun, 15 Feb 2026 20:05:45 -0800
Subject: [PATCH 11/33] Fix upsert-to-LRU non-atomicity causing permanently
 un-evictable entries

The upsert_async + LRU notification sequence was not atomic: concurrent
inserts or evictions could interleave between the HashMap mutation and
the LRU tracker message, causing stale DeleterCtx{fid} values that make
remove_if_async silently fail and leave entries permanently un-evictable.

Fix by switching to entry_async (holds the bucket lock while allocating a
global monotonic version), then using a sync try_send-based upsert() for
the LRU notification so it cannot be lost to task cancellation.

Key changes:
- Add Versioned trait to lru.rs for version-based message deduplication
- Replace Message::Inserted with Message::Upserted; worker deduplicates
  by comparing versions and gracefully handles missing keys on Accessed
- Replace async insert() with sync upsert() using try_send + spawn
  fallback, making the LRU notification non-cancellable
- FileCache::insert uses entry_async to atomically read old entry and
  allocate version under the bucket lock; all post-lock operations
  (guard defuse, LRU upsert, size accounting) are synchronous
- Add check-cfg for loom and wire up the sync shim module to enable
  future loom-based concurrency testing
---
 Cargo.lock                |  35 ++++++++++
 Cargo.toml                |   4 ++
 lib/cache/eviction/lru.rs | 127 +++++++++++++++++++-----------------
 lib/cache/fcache.rs       | 132 +++++++++++++++++++++-----------------
 lib/lib.rs                |   1 +
 lib/sync/atomic.rs        |  13 ++++
 lib/sync/mod.rs           |  67 +++++++++++++++++++
 7 files changed, 259 insertions(+), 120 deletions(-)
 create mode 100644 lib/sync/atomic.rs
 create mode 100644 lib/sync/mod.rs

diff --git a/Cargo.lock b/Cargo.lock
index ebbe571..7cbaf5b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -718,6 +718,21 @@ dependencies = [
  "thread_local",
 ]
 
+[[package]]
+name = "generator"
+version = "0.8.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52f04ae4152da20c76fe800fa48659201d5cf627c5149ca0b707b69d7eef6cf9"
+dependencies = [
+ "cc",
+ "cfg-if",
+ "libc",
+ "log",
+ "rustversion",
+ "windows-link",
+ "windows-result",
+]
+
 [[package]]
 name = "getrandom"
 version = "0.2.17"
@@ -777,6 +792,7 @@ dependencies = [
  "http",
  "inquire",
  "libc",
+ "loom",
  "mesa-dev",
  "nix",
  "num-traits",
@@ -1304,6 +1320,19 @@ version = "0.4.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
 
+[[package]]
+name = "loom"
+version = "0.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca"
+dependencies = [
+ "cfg-if",
+ "generator",
+ "scoped-tls",
+ "tracing",
+ "tracing-subscriber",
+]
+
 [[package]]
 name = "lru-slab"
 version = "0.1.2"
@@ -2119,6 +2148,12 @@ dependencies = [
  "serde_json",
 ]
 
+[[package]]
+name = "scoped-tls"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294"
+
 [[package]]
 name = "scopeguard"
 version = "1.2.0"
diff --git a/Cargo.toml b/Cargo.toml
index b6fe250..6f777d9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -52,6 +52,9 @@ opentelemetry-otlp = { version = "0.29", default-features = false, features = ["
 tracing-opentelemetry = { version = "0.30" }
 hashlink = "0.11.0"
 
+[target.'cfg(loom)'.dependencies]
+loom = "0.7"
+
 [features]
 default = []
 staging = []
@@ -60,6 +63,7 @@ staging = []
 vergen-gitcl = { version = "1", features = [] }
 
 [lints.rust]
+unexpected_cfgs            = { level = "warn", check-cfg = ['cfg(loom)'] }
 unsafe_code                = "allow"
 missing_docs               = "warn"
 unreachable_pub            = "allow"
diff --git a/lib/cache/eviction/lru.rs b/lib/cache/eviction/lru.rs
index e8f3917..7a33cde 100644
--- a/lib/cache/eviction/lru.rs
+++ b/lib/cache/eviction/lru.rs
@@ -1,16 +1,20 @@
 //! Implements the LRU eviction policy.
 
-use std::{
-    future::Future,
-    hash::Hash,
-    sync::{
-        Arc, OnceLock,
-        atomic::{AtomicI64, AtomicUsize},
-    },
-};
+use std::{future::Future, hash::Hash};
+
+use crate::sync::{Arc, OnceLock, atomic, atomic::AtomicI64, atomic::AtomicUsize};
 
 use hashlink::LinkedHashMap;
-use tokio::{sync::mpsc::Receiver, task::JoinHandle};
+use tokio::{
+    sync::mpsc::{Receiver, error::TrySendError},
+    task::JoinHandle,
+};
+
+/// Types that carry a monotonic version for deduplication of out-of-order messages.
+pub trait Versioned {
+    /// Returns the monotonic version of this value.
+    fn version(&self) -> u64;
+}
 
 /// A trait for deleting keys from the cache. This is used by the LRU eviction tracker to delete
 /// keys when they are evicted.
@@ -28,8 +32,8 @@ enum Message<K, C> {
     Accessed(K, usize),
     /// Request an eviction set of the given size.
     Evict(u32),
-    /// Notify the LRU eviction tracker that a given key was inserted.
-    Inserted(K, C),
+    /// Notify the LRU eviction tracker that a key was inserted or overwritten.
+    Upserted(K, C),
 }
 
 /// Tracks in-flight eviction batches and individual deletions using a single packed `AtomicI64`.
@@ -60,36 +64,33 @@ impl DeletionIndicator {
     /// side *before* the `Evict` message is sent into the channel.
     fn submit_batch(&self) {
         self.underlying
-            .fetch_add(1 << 32, std::sync::atomic::Ordering::Relaxed);
+            .fetch_add(1 << 32, atomic::Ordering::Relaxed);
     }
 
     /// Undo a `submit_batch` call. Used when the channel `try_send` fails after we already
     /// incremented the pending-batch counter.
     fn undo_submit_batch(&self) {
         self.underlying
-            .fetch_sub(1 << 32, std::sync::atomic::Ordering::Relaxed);
+            .fetch_sub(1 << 32, atomic::Ordering::Relaxed);
     }
 
     /// Called by the worker when it begins processing an eviction batch. Atomically decrements
     /// the pending-batch counter (upper half) by 1 and increments the active-deletion counter
     /// (lower half) by `count`.
     fn process_batch(&self, count: u32) {
-        self.underlying.fetch_add(
-            i64::from(count) - (1 << 32),
-            std::sync::atomic::Ordering::Relaxed,
-        );
+        self.underlying
+            .fetch_add(i64::from(count) - (1 << 32), atomic::Ordering::Relaxed);
     }
 
     /// Called by a spawned deletion task when it finishes deleting one key.
     fn observe_deletion(&self) {
-        self.underlying
-            .fetch_sub(1, std::sync::atomic::Ordering::Relaxed);
+        self.underlying.fetch_sub(1, atomic::Ordering::Relaxed);
     }
 
     /// Returns `true` if there are any pending batches (submitted but not yet processed by the
     /// worker). Used by producers to avoid enqueuing redundant eviction requests.
     fn have_pending_batches(&self) -> bool {
-        self.underlying.load(std::sync::atomic::Ordering::Relaxed) >= 1 << 32
+        self.underlying.load(atomic::Ordering::Relaxed) >= 1 << 32
     }
 }
 
@@ -107,7 +108,7 @@ struct LruProcessingTask<K: Copy, C, D: Deleter<K, C>> {
     shared: Arc<WorkerState>,
 }
 
-impl<K: Copy + Eq + Hash + Send + 'static, C: Send + 'static, D: Deleter<K, C>>
+impl<K: Copy + Eq + Hash + Send + 'static, C: Versioned + Send + 'static, D: Deleter<K, C>>
     LruProcessingTask<K, C, D>
 {
     fn new(deleter: D, receiver: Receiver<Message<K, C>>, shared: Arc<WorkerState>) -> Self {
@@ -146,17 +147,18 @@ impl<K: Copy + Eq + Hash + Send + 'static, C: Send + 'static, D: Deleter<K, C>>
                     < self
                         .shared
                         .eviction_generation
-                        .load(std::sync::atomic::Ordering::Relaxed)
+                        .load(atomic::Ordering::Relaxed)
                 {
-                    // This is a ghost access. Happens when one client sends an eviction, but other
-                    // clients add accesses to the same keys, before our worker thread had a chance
-                    // to clean up.
-                    //
-                    // We're looking at an access in the "future".
+                    // This is a ghost access. Happens when one client sends an eviction, but
+                    // other clients add accesses to the same keys, before our worker thread had
+                    // a chance to clean up.
                     return true;
                 }
 
-                self.reposition_existing_key(k);
+                // The key may have been evicted between the access and this message arriving.
+                if let Some(entry) = self.ordered_key_map.remove(&k) {
+                    self.ordered_key_map.insert(k, entry);
+                }
             }
             Message::Evict(max_count) => {
                 // We got an eviction notice. Bump the eviction generation to indicate that all
@@ -164,7 +166,7 @@ impl<K: Copy + Eq + Hash + Send + 'static, C: Send + 'static, D: Deleter<K, C>>
                 // comment for an explanation of why we need to do this.
                 self.shared
                     .eviction_generation
-                    .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
+                    .fetch_add(1, atomic::Ordering::Relaxed);
                 // Before we send off the eviction set, we actually need to remove the evicted
                 // keys from the pending queue. Read the following for an explanation.
                 //
@@ -204,12 +206,12 @@ impl<K: Copy + Eq + Hash + Send + 'static, C: Send + 'static, D: Deleter<K, C>>
                 // so the best we can do is simply mark the stupid keys as "evicted" and ignore
                 // any accesses from these keys until they're marked as "inserted" again.
                 //
-                // This is what the generation counter does -- it allows us to track which messages
-                // are considered "stale". There are some problems with the generation counter,
-                // however. We drop all messages that are "generationally old". Under high
-                // contention, this results in the eviction policy acting like a fuzzy LRU policy,
-                // rather than a strict LRU policy, but for the purposes of this cache, this is an
-                // acceptable tradeoff.
+                // This is what the generation counter does -- it allows us to track which
+                // messages are considered "stale". There are some problems with the generation
+                // counter, however. We drop all messages that are "generationally old". Under
+                // high contention, this results in the eviction policy acting like a fuzzy LRU
+                // policy, rather than a strict LRU policy, but for the purposes of this cache,
+                // this is an acceptable tradeoff.
                 //
                 // TODO(markovejnovic): Make this a strict LRU.
                 //
@@ -229,41 +231,32 @@ impl<K: Copy + Eq + Hash + Send + 'static, C: Send + 'static, D: Deleter<K, C>>
                     self.shared.active_deletions.process_batch(take_count);
 
                     for _ in 0..take_count {
-                        let Some(e) = self.ordered_key_map.pop_front() else {
+                        let Some((key, ctx)) = self.ordered_key_map.pop_front() else {
                             break;
                         };
                         let mut deleter = self.deleter.clone();
                         let shared_clone = Arc::clone(&self.shared);
                         tokio::spawn(async move {
-                            deleter.delete(e.0, e.1).await;
+                            deleter.delete(key, ctx).await;
                             shared_clone.active_deletions.observe_deletion();
                         });
                     }
                 }
             }
-            Message::Inserted(k, ctx) => {
-                debug_assert!(
-                    !self.ordered_key_map.contains_key(&k),
-                    "key must not already exist in the ordered set when inserting"
-                );
-
+            Message::Upserted(k, ctx) => {
+                if let Some(existing) = self.ordered_key_map.get(&k)
+                    && ctx.version() < existing.version()
+                {
+                    // Stale message from a previous incarnation; drop it.
+                    return true;
+                }
+                self.ordered_key_map.remove(&k);
                 self.ordered_key_map.insert(k, ctx);
             }
         }
 
         true
     }
-
-    fn reposition_existing_key(&mut self, key: K) {
-        debug_assert!(
-            self.ordered_key_map.contains_key(&key),
-            "key must exist in the ordered set before repositioning"
-        );
-
-        if let Some(c) = self.ordered_key_map.remove(&key) {
-            self.ordered_key_map.insert(key, c);
-        }
-    }
 }
 
 #[derive(Debug)]
@@ -283,7 +276,9 @@ pub struct LruEvictionTracker<K, C> {
     worker_state: Arc<WorkerState>,
 }
 
-impl<K: Copy + Eq + Send + Hash + 'static, C: Send + 'static> LruEvictionTracker<K, C> {
+impl<K: Copy + Eq + Send + Hash + 'static, C: Versioned + Copy + Send + 'static>
+    LruEvictionTracker<K, C>
+{
     /// Spawn a new LRU eviction tracker with the given deleter and channel size.
     pub fn spawn<D: Deleter<K, C>>(deleter: D, channel_size: usize) -> Self {
         let (tx, rx) = tokio::sync::mpsc::channel(channel_size);
@@ -304,11 +299,23 @@ impl<K: Copy + Eq + Send + Hash + 'static, C: Send + 'static> LruEvictionTracker
         }
     }
 
-    /// Notify the LRU eviction tracker that the given key was inserted.
+    /// Notify the LRU tracker that a key was inserted or overwritten.
     ///
-    /// You MUST call this method for every new key that is inserted into the cache.
-    pub async fn insert(&self, key: K, ctx: C) {
-        self.send_msg(Message::Inserted(key, ctx)).await;
+    /// Non-cancellable: uses `try_send` (sync) with a `tokio::spawn` fallback so that the
+    /// notification cannot be lost to task cancellation.
+    pub fn upsert(&self, key: K, ctx: C) {
+        match self
+            .worker_message_sender
+            .try_send(Message::Upserted(key, ctx))
+        {
+            Ok(()) | Err(TrySendError::Closed(_)) => {}
+            Err(TrySendError::Full(msg)) => {
+                let sender = self.worker_message_sender.clone();
+                tokio::spawn(async move {
+                    let _ = sender.send(msg).await;
+                });
+            }
+        }
     }
 
     /// Notify the LRU eviction tracker that the given key was accessed.
@@ -319,7 +326,7 @@ impl<K: Copy + Eq + Send + Hash + 'static, C: Send + 'static> LruEvictionTracker
             key,
             self.worker_state
                 .eviction_generation
-                .load(std::sync::atomic::Ordering::Relaxed),
+                .load(atomic::Ordering::Relaxed),
         ))
         .await;
     }
diff --git a/lib/cache/fcache.rs b/lib/cache/fcache.rs
index 13b35fa..2cfb58a 100644
--- a/lib/cache/fcache.rs
+++ b/lib/cache/fcache.rs
@@ -4,14 +4,15 @@ use std::{
     fmt::Debug,
     hash::Hash,
     path::{Path, PathBuf},
-    sync::{Arc, atomic::AtomicUsize},
 };
 
+use crate::sync::{Arc, atomic, atomic::AtomicU64, atomic::AtomicUsize};
+
 use tracing::error;
 
 use crate::{
     cache::{
-        eviction::lru::{Deleter, LruEvictionTracker},
+        eviction::lru::{Deleter, LruEvictionTracker, Versioned},
         traits::{AsyncReadableCache, AsyncWritableCache},
     },
     io,
@@ -45,7 +46,7 @@ struct FileCacheShared<K: Eq + Hash> {
 
 impl<K: Eq + Hash + Send + Sync> FileCacheShared<K> {
     fn size(&self) -> usize {
-        self.size_bytes.load(std::sync::atomic::Ordering::Relaxed)
+        self.size_bytes.load(atomic::Ordering::Relaxed)
     }
 
     fn path_for(&self, fid: usize) -> PathBuf {
@@ -66,9 +67,16 @@ impl<K: Eq + Hash + Send + Sync> FileCacheShared<K> {
     }
 }
 
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Copy)]
 struct DeleterCtx {
     fid: usize,
+    version: u64,
+}
+
+impl Versioned for DeleterCtx {
+    fn version(&self) -> u64 {
+        self.version
+    }
 }
 
 #[derive(Clone)]
@@ -97,7 +105,7 @@ impl<K: Copy + Eq + Hash + Sync + Send + 'static> Deleter<K, DeleterCtx> for Lru
 
             self.shared
                 .size_bytes
-                .fetch_sub(entry.1.size_bytes, std::sync::atomic::Ordering::Relaxed);
+                .fetch_sub(entry.1.size_bytes, atomic::Ordering::Relaxed);
         }
     }
 }
@@ -160,6 +168,10 @@ pub struct FileCache<K: Eq + Hash> {
     /// LRU eviction tracker. This is used to track the least recently used keys in the cache, and
     /// to evict keys when necessary.
     lru_tracker: LruEvictionTracker<K, DeleterCtx>,
+
+    /// Global monotonic counter for per-key versioning. Incremented under the scc bucket lock
+    /// to guarantee that versions are strictly monotonically increasing per key.
+    version_counter: AtomicU64,
 }
 
 impl<K: Eq + Hash + Send + Sync + Copy + 'static> FileCache<K> {
@@ -253,6 +265,7 @@ impl<K: Eq + Hash + Send + Sync + Copy + 'static> FileCache<K> {
                 LruEntryDeleter::new(shared),
                 Self::MAX_LRU_TRACKER_CHANNEL_SIZE,
             ),
+            version_counter: AtomicU64::new(0),
         })
     }
 
@@ -346,21 +359,13 @@ impl<K: Eq + Hash + Copy + Send + Sync + 'static + Debug>
     AsyncWritableCache<K, Vec<u8>, CacheWriteError> for FileCache<K>
 {
     async fn insert(&self, key: &K, value: Vec<u8>) -> Result<(), CacheWriteError> {
-        // Inserts are tricky. The first thing we'll do is find a new file handle for the new
-        // value.
-        let new_entry = CacheMapEntry {
-            fid: self
-                .file_generator
-                .fetch_add(1, std::sync::atomic::Ordering::Relaxed),
-            size_bytes: value.len(),
-        };
+        let new_fid = self.file_generator.fetch_add(1, atomic::Ordering::Relaxed);
+        let new_size = value.len();
 
-        while self.shared.size() + new_entry.size_bytes > self.max_size_bytes {
+        while self.shared.size() + new_size > self.max_size_bytes {
             if self.shared.size() == 0 {
-                // This means that the new entry is larger than the entire cache size limit. In
-                // this case, we have no choice but to just insert the entry and let the cache
-                // temporarily exceed the size limit. We will try to evict entries as soon as
-                // possible, but in the meantime, we need to at least insert this entry.
+                // The new entry is larger than the entire cache size limit. Insert it anyway
+                // and let the cache temporarily exceed the limit.
                 break;
             }
 
@@ -368,70 +373,77 @@ impl<K: Eq + Hash + Copy + Send + Sync + 'static + Debug>
             // realistically, I think I could've gotten away with just a plain Notify. It is what
             // it is now.
 
-            // The cache doesn't have any space for this new entry, so we need to evict some
-            // entries before we can insert this new entry.
-            // Before we can do that, we need to make sure there are no other pending culls. If we
-            // let multiple culls happen at the same time, we risk draining a lot more entries from
-            // the cache than necessary, which results in a regression.
             if self.lru_tracker.have_pending_culls() {
-                // If there are any pending culls, then we need to just wait.
                 // TODO(markovejnovic): This could be nicer and maybe starve the CPU less. Chances
                 // are we actually need to yield to the LRU tracker task.
                 tokio::task::yield_now().await;
                 continue;
             }
 
-            // There are no culls in progress, but the cache is still too full for this new entry,
-            // which means we need to evict some entries.
             if !self.lru_tracker.try_cull(Self::LRU_EVICTION_MAX_BATCH_SIZE) {
                 tokio::task::yield_now().await;
             }
         }
 
-        let path = self.shared.path_for(new_entry.fid);
+        // Write file to disk.
+        let path = self.shared.path_for(new_fid);
         let mut new_file = self.create_file(&path).await?;
         let mut guard = FileGuard { path: Some(path) };
         new_file.write_all(&value).await?;
 
-        let mut size_delta: isize = new_entry.size_bytes.cast_signed();
-
-        // Register the file in the map. After this point, the map owns the file and the guard
-        // must not delete it.
-        let old_entry = self.shared.map.upsert_async(*key, new_entry).await;
-        guard.path = None;
-
-        if let Some(old_entry) = old_entry {
-            // If there was an old file ID, we need to delete the old file and notify the LRU
-            // tracker that the key was accessed.
-            self.lru_tracker.access(*key).await;
-            size_delta -= old_entry.size_bytes.cast_signed();
-
-            // TODO(markovejnovic): Could stack allocate the path.
-            // Note that the file already may be deleted at this point -- the LRU tracker deleter
-            // may have already deleted this file, so if the file doesn't exist, that's already
-            // fine with us.
-            if let Err(e) = self.shared.delete_entry_from_disk_async(&old_entry).await {
-                error!(error = ?e, key = ?key, "failed to delete old cache entry from disk");
+        // Use entry_async to lock the bucket, then allocate version under the lock.
+        // This ensures version monotonicity per key matches the actual mutation order.
+        let (old_entry, new_version) = match self.shared.map.entry_async(*key).await {
+            scc::hash_map::Entry::Occupied(mut o) => {
+                let old = *o.get();
+                let v = self.version_counter.fetch_add(1, atomic::Ordering::Relaxed);
+                *o.get_mut() = CacheMapEntry {
+                    fid: new_fid,
+                    size_bytes: new_size,
+                };
+                (Some(old), v)
             }
-        } else {
-            self.lru_tracker
-                .insert(*key, DeleterCtx { fid: new_entry.fid })
-                .await;
-        }
+            scc::hash_map::Entry::Vacant(vacant) => {
+                let v = self.version_counter.fetch_add(1, atomic::Ordering::Relaxed);
+                vacant.insert_entry(CacheMapEntry {
+                    fid: new_fid,
+                    size_bytes: new_size,
+                });
+                (None, v)
+            }
+        };
+        // Bucket lock released here.
+        guard.path = None;
 
+        // LRU notification (sync via try_send, non-cancellable).
+        self.lru_tracker.upsert(
+            *key,
+            DeleterCtx {
+                fid: new_fid,
+                version: new_version,
+            },
+        );
+
+        // Size accounting (sync atomic, non-cancellable).
+        let size_delta: isize = new_size.cast_signed()
+            - old_entry
+                .as_ref()
+                .map_or(0isize, |e| e.size_bytes.cast_signed());
         if size_delta > 0 {
-            self.shared.size_bytes.fetch_add(
-                size_delta.cast_unsigned(),
-                std::sync::atomic::Ordering::Relaxed,
-            );
+            self.shared
+                .size_bytes
+                .fetch_add(size_delta.cast_unsigned(), atomic::Ordering::Relaxed);
         } else if size_delta < 0 {
-            self.shared.size_bytes.fetch_sub(
-                size_delta.unsigned_abs(),
-                std::sync::atomic::Ordering::Relaxed,
-            );
+            self.shared
+                .size_bytes
+                .fetch_sub(size_delta.unsigned_abs(), atomic::Ordering::Relaxed);
+        }
+
+        // Deferrable: delete old file (safe to cancel — file is orphaned).
+        if let Some(old_entry) = old_entry {
+            drop(self.shared.delete_entry_from_disk_async(&old_entry).await);
         }
 
-        // Epic, we did it.
         Ok(())
     }
 }
diff --git a/lib/lib.rs b/lib/lib.rs
index 058aae4..390f786 100644
--- a/lib/lib.rs
+++ b/lib/lib.rs
@@ -7,3 +7,4 @@
 /// Caching primitives for git-fs.
 pub mod cache;
 pub mod io;
+pub mod sync;
diff --git a/lib/sync/atomic.rs b/lib/sync/atomic.rs
new file mode 100644
index 0000000..2c77deb
--- /dev/null
+++ b/lib/sync/atomic.rs
@@ -0,0 +1,13 @@
+//! Re-export of atomics, so that we can use loom's atomics when testing with loom.
+
+#[cfg(loom)]
+pub use loom::sync::atomic::{
+    AtomicBool, AtomicI8, AtomicI16, AtomicI32, AtomicI64, AtomicIsize, AtomicU8, AtomicU16,
+    AtomicU32, AtomicU64, AtomicUsize, Ordering,
+};
+
+#[cfg(not(loom))]
+pub use std::sync::atomic::{
+    AtomicBool, AtomicI8, AtomicI16, AtomicI32, AtomicI64, AtomicIsize, AtomicU8, AtomicU16,
+    AtomicU32, AtomicU64, AtomicUsize, Ordering,
+};
diff --git a/lib/sync/mod.rs b/lib/sync/mod.rs
new file mode 100644
index 0000000..32ce948
--- /dev/null
+++ b/lib/sync/mod.rs
@@ -0,0 +1,67 @@
+//! Synchronization primitives.
+//!
+//! Shims between loom and std synchronization primitives.
+pub mod atomic;
+
+#[cfg(loom)]
+pub use loom::sync::{Arc, Condvar, Mutex, Once, RwLock, RwLockReadGuard, RwLockWriteGuard};
+
+#[cfg(loom)]
+pub struct OnceLock<T> {
+    once: Once<T>,
+}
+
+#[cfg(loom)]
+unsafe impl<T: Send + Sync> Send for OnceLock<T> {}
+
+#[cfg(loom)]
+unsafe impl<T: Send + Sync> Sync for OnceLock<T> {}
+
+#[cfg(loom)]
+impl<T> OnceLock<T> {
+    pub fn new() -> Self {
+        Self {
+            once: Once::new(),
+            value: UnsafeCell::new(None),
+        }
+    }
+
+    pub fn get(&self) -> Option<&T> {
+        if self.once.is_completed() {
+            // Safety: once is completed, so value is initialized
+            // and will never be written to again
+            unsafe { (*self.value.with(|ptr| ptr)).as_ref() }
+        } else {
+            None
+        }
+    }
+
+    pub fn get_or_init(&self, f: impl FnOnce() -> T) -> &T {
+        self.once.call_once(|| unsafe {
+            self.value.with_mut(|ptr| {
+                *ptr = Some(f());
+            });
+        });
+        // Safety: call_once guarantees initialization is complete
+        unsafe { (*self.value.with(|ptr| ptr)).as_ref().unwrap() }
+    }
+
+    pub fn set(&self, value: T) -> Result<(), T> {
+        let mut value = Some(value);
+        self.once.call_once(|| {
+            let val = value.take().unwrap();
+            unsafe {
+                self.value.with_mut(|ptr| {
+                    *ptr = Some(val);
+                });
+            }
+        });
+        match value {
+            None => Ok(()),    // we consumed it — success
+            Some(v) => Err(v), // already initialized
+        }
+    }
+}
+
+#[cfg(not(loom))]
+pub use std::sync::{Arc, Condvar, Mutex, OnceLock, RwLock, RwLockReadGuard, RwLockWriteGuard};

From abfdc5eb8b79d8ff0c69e70df5a0b53419736d06 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Sun, 15 Feb 2026 20:43:23 -0800
Subject: [PATCH 12/33] Fix three concurrency bugs in file cache

- DeletionIndicator::have_pending_work now checks != 0 (was >= 1<<32)
  so the eviction loop waits for in-flight deletions, not just pending
  batches, preventing cascading over-eviction.  A DeletionGuard drop
  guard on spawned deletion tasks ensures observe_deletion() fires even
  on cancellation or panic.

- Size accounting moved inside the scc bucket lock in insert() to
  prevent transient AtomicUsize underflow when concurrent inserts to the
  same key interleave their deltas.

- access() changed from async send().await (cancellable, panics on
  channel close) to sync try_send + tokio::spawn fallback, matching the
  existing upsert() pattern.  Removes the unreachable!() panic path
  during runtime shutdown.
---
 lib/cache/eviction/lru.rs | 46 ++++++++++++++++++++++++---------------
 lib/cache/fcache.rs       | 36 ++++++++++++++++--------------
 2 files changed, 48 insertions(+), 34 deletions(-)

diff --git a/lib/cache/eviction/lru.rs b/lib/cache/eviction/lru.rs
index 7a33cde..ef052d6 100644
--- a/lib/cache/eviction/lru.rs
+++ b/lib/cache/eviction/lru.rs
@@ -87,10 +87,10 @@ impl DeletionIndicator {
         self.underlying.fetch_sub(1, atomic::Ordering::Relaxed);
     }
 
-    /// Returns `true` if there are any pending batches (submitted but not yet processed by the
-    /// worker). Used by producers to avoid enqueuing redundant eviction requests.
-    fn have_pending_batches(&self) -> bool {
-        self.underlying.load(atomic::Ordering::Relaxed) >= 1 << 32
+    /// Returns `true` if there is any eviction work in progress — either batches waiting to be
+    /// processed by the worker, or individual deletions still in flight.
+    fn have_pending_work(&self) -> bool {
+        self.underlying.load(atomic::Ordering::Relaxed) != 0
     }
 }
 
@@ -237,8 +237,16 @@ impl<K: Copy + Eq + Hash + Send + 'static, C: Versioned + Send + 'static, D: Del
                         let mut deleter = self.deleter.clone();
                         let shared_clone = Arc::clone(&self.shared);
                         tokio::spawn(async move {
+                            // Drop guard ensures observe_deletion() runs even if the
+                            // deletion task is cancelled or panics.
+                            struct DeletionGuard(Arc<WorkerState>);
+                            impl Drop for DeletionGuard {
+                                fn drop(&mut self) {
+                                    self.0.active_deletions.observe_deletion();
+                                }
+                            }
+                            let _guard = DeletionGuard(shared_clone);
                             deleter.delete(key, ctx).await;
-                            shared_clone.active_deletions.observe_deletion();
                         });
                     }
                 }
@@ -320,15 +328,24 @@ impl<K: Copy + Eq + Send + Hash + 'static, C: Versioned + Copy + Send + 'static>
 
     /// Notify the LRU eviction tracker that the given key was accessed.
     ///
-    /// You MUST call this method for every read or update to a key.
-    pub async fn access(&self, key: K) {
-        self.send_msg(Message::Accessed(
+    /// Non-cancellable: uses `try_send` (sync) with a `tokio::spawn` fallback so that the
+    /// notification cannot be lost to task cancellation.
+    pub fn access(&self, key: K) {
+        let msg = Message::Accessed(
             key,
             self.worker_state
                 .eviction_generation
                 .load(atomic::Ordering::Relaxed),
-        ))
-        .await;
+        );
+        match self.worker_message_sender.try_send(msg) {
+            Ok(()) | Err(TrySendError::Closed(_)) => {}
+            Err(TrySendError::Full(msg)) => {
+                let sender = self.worker_message_sender.clone();
+                tokio::spawn(async move {
+                    let _ = sender.send(msg).await;
+                });
+            }
+        }
     }
 
     /// Try to cull the least recently used keys with the given deletion method.
@@ -358,13 +375,6 @@ impl<K: Copy + Eq + Send + Hash + 'static, C: Versioned + Copy + Send + 'static>
     /// Check whether there are culls that are already scheduled or actively in progress.
     #[must_use]
     pub fn have_pending_culls(&self) -> bool {
-        self.worker_state.active_deletions.have_pending_batches()
-    }
-
-    /// Send a message to the worker. This is a helper method to reduce code duplication.
-    async fn send_msg(&self, message: Message<K, C>) {
-        if self.worker_message_sender.send(message).await.is_err() {
-            unreachable!();
-        }
+        self.worker_state.active_deletions.have_pending_work()
     }
 }
diff --git a/lib/cache/fcache.rs b/lib/cache/fcache.rs
index 2cfb58a..8974783 100644
--- a/lib/cache/fcache.rs
+++ b/lib/cache/fcache.rs
@@ -340,7 +340,7 @@ impl<K: Eq + Hash + Copy + Send + Sync + Debug + 'static> AsyncReadableCache<K,
                     error!(error = ?e, key = ?key, "IO error while reading file for cache key");
                 })
                 .ok()?;
-            self.lru_tracker.access(*key).await;
+            self.lru_tracker.access(*key);
             return Some(buf);
         }
 
@@ -393,6 +393,8 @@ impl<K: Eq + Hash + Copy + Send + Sync + 'static + Debug>
 
         // Use entry_async to lock the bucket, then allocate version under the lock.
         // This ensures version monotonicity per key matches the actual mutation order.
+        // Size accounting is also done under the lock to prevent transient underflow
+        // when concurrent inserts to the same key interleave their deltas.
         let (old_entry, new_version) = match self.shared.map.entry_async(*key).await {
             scc::hash_map::Entry::Occupied(mut o) => {
                 let old = *o.get();
@@ -401,6 +403,18 @@ impl<K: Eq + Hash + Copy + Send + Sync + 'static + Debug>
                     fid: new_fid,
                     size_bytes: new_size,
                 };
+
+                let size_delta: isize = new_size.cast_signed() - old.size_bytes.cast_signed();
+                if size_delta > 0 {
+                    self.shared
+                        .size_bytes
+                        .fetch_add(size_delta.cast_unsigned(), atomic::Ordering::Relaxed);
+                } else if size_delta < 0 {
+                    self.shared
+                        .size_bytes
+                        .fetch_sub(size_delta.unsigned_abs(), atomic::Ordering::Relaxed);
+                }
+
                 (Some(old), v)
             }
             scc::hash_map::Entry::Vacant(vacant) => {
@@ -409,6 +423,11 @@ impl<K: Eq + Hash + Copy + Send + Sync + 'static + Debug>
                     fid: new_fid,
                     size_bytes: new_size,
                 });
+
+                self.shared
+                    .size_bytes
+                    .fetch_add(new_size, atomic::Ordering::Relaxed);
+
                 (None, v)
             }
         };
@@ -424,21 +443,6 @@ impl<K: Eq + Hash + Copy + Send + Sync + 'static + Debug>
             },
         );
 
-        // Size accounting (sync atomic, non-cancellable).
-        let size_delta: isize = new_size.cast_signed()
-            - old_entry
-                .as_ref()
-                .map_or(0isize, |e| e.size_bytes.cast_signed());
-        if size_delta > 0 {
-            self.shared
-                .size_bytes
-                .fetch_add(size_delta.cast_unsigned(), atomic::Ordering::Relaxed);
-        } else if size_delta < 0 {
-            self.shared
-                .size_bytes
-                .fetch_sub(size_delta.unsigned_abs(), atomic::Ordering::Relaxed);
-        }
-
         // Deferrable: delete old file (safe to cancel — file is orphaned).
         if let Some(old_entry) = old_entry {
             drop(self.shared.delete_entry_from_disk_async(&old_entry).await);

From 82f353a70fcc728b5c44dcca208f7896f8ecdb1b Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Sun, 15 Feb 2026 21:05:25 -0800
Subject: [PATCH 13/33] Avoid an infinite loop

---
 lib/cache/fcache.rs | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/lib/cache/fcache.rs b/lib/cache/fcache.rs
index 8974783..2140ce6 100644
--- a/lib/cache/fcache.rs
+++ b/lib/cache/fcache.rs
@@ -197,6 +197,11 @@ impl<K: Eq + Hash + Send + Sync + Copy + 'static> FileCache<K> {
     // See usage for reasoning on why this is necessary.
     const MAX_READ_RETRY_COUNT: usize = 8;
 
+    // The maximum number of eviction loop iterations before giving up and proceeding with the
+    // insert. Prevents livelock when the LRU worker's ordered map is empty (e.g., Upserted
+    // messages haven't been delivered yet) and try_cull succeeds but evicts nothing.
+    const MAX_EVICTION_ATTEMPTS: u32 = 4;
+
     /// Try to create a new file cache at the given path.
     ///
     ///
@@ -362,6 +367,7 @@ impl<K: Eq + Hash + Copy + Send + Sync + 'static + Debug>
         let new_fid = self.file_generator.fetch_add(1, atomic::Ordering::Relaxed);
         let new_size = value.len();
 
+        let mut eviction_attempts = 0u32;
         while self.shared.size() + new_size > self.max_size_bytes {
             if self.shared.size() == 0 {
                 // The new entry is larger than the entire cache size limit. Insert it anyway
@@ -369,13 +375,15 @@ impl<K: Eq + Hash + Copy + Send + Sync + 'static + Debug>
                 break;
             }
 
-            // TODO(markovejnovic): This whole spinlock situation sounded better in my head, but
-            // realistically, I think I could've gotten away with just a plain Notify. It is what
-            // it is now.
+            if eviction_attempts >= Self::MAX_EVICTION_ATTEMPTS {
+                // We've tried enough times. The cache is over budget but max_size_bytes is a
+                // soft hint -- proceed with the insert and let future inserts drive further
+                // eviction.
+                break;
+            }
+            eviction_attempts += 1;
 
             if self.lru_tracker.have_pending_culls() {
-                // TODO(markovejnovic): This could be nicer and maybe starve the CPU less. Chances
-                // are we actually need to yield to the LRU tracker task.
                 tokio::task::yield_now().await;
                 continue;
             }

From 5dab7ead00ee41943b420b70dbbe7432850118aa Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Sun, 15 Feb 2026 22:31:28 -0800
Subject: [PATCH 14/33] remove broken code

---
 Cargo.lock                | 35 ---------------
 Cargo.toml                |  5 +--
 lib/cache/eviction/lru.rs | 89 ++++-----------------------------------
 lib/cache/fcache.rs       |  5 ++-
 lib/lib.rs                |  1 -
 lib/sync/atomic.rs        | 13 ------
 lib/sync/mod.rs           | 67 -----------------------------
 7 files changed, 13 insertions(+), 202 deletions(-)
 delete mode 100644 lib/sync/atomic.rs
 delete mode 100644 lib/sync/mod.rs

diff --git a/Cargo.lock b/Cargo.lock
index 7cbaf5b..ebbe571 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -718,21 +718,6 @@ dependencies = [
  "thread_local",
 ]
 
-[[package]]
-name = "generator"
-version = "0.8.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "52f04ae4152da20c76fe800fa48659201d5cf627c5149ca0b707b69d7eef6cf9"
-dependencies = [
- "cc",
- "cfg-if",
- "libc",
- "log",
- "rustversion",
- "windows-link",
- "windows-result",
-]
-
 [[package]]
 name = "getrandom"
 version = "0.2.17"
@@ -792,7 +777,6 @@ dependencies = [
  "http",
  "inquire",
  "libc",
- "loom",
  "mesa-dev",
  "nix",
  "num-traits",
@@ -1320,19 +1304,6 @@ version = "0.4.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
 
-[[package]]
-name = "loom"
-version = "0.7.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca"
-dependencies = [
- "cfg-if",
- "generator",
- "scoped-tls",
- "tracing",
- "tracing-subscriber",
-]
-
 [[package]]
 name = "lru-slab"
 version = "0.1.2"
@@ -2148,12 +2119,6 @@ dependencies = [
  "serde_json",
 ]
 
-[[package]]
-name = "scoped-tls"
-version = "1.0.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294"
-
 [[package]]
 name = "scopeguard"
 version = "1.2.0"
diff --git a/Cargo.toml b/Cargo.toml
index 6f777d9..399c11b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -52,9 +52,6 @@ opentelemetry-otlp = { version = "0.29", default-features = false, features = ["
 tracing-opentelemetry = { version = "0.30" }
 hashlink = "0.11.0"
 
-[target.'cfg(loom)'.dependencies]
-loom = "0.7"
-
 [features]
 default = []
 staging = []
@@ -63,7 +60,7 @@ staging = []
 vergen-gitcl = { version = "1", features = [] }
 
 [lints.rust]
-unexpected_cfgs            = { level = "warn", check-cfg = ['cfg(loom)'] }
+unexpected_cfgs            = "warn"
 unsafe_code                = "allow"
 missing_docs               = "warn"
 unreachable_pub            = "allow"
diff --git a/lib/cache/eviction/lru.rs b/lib/cache/eviction/lru.rs
index ef052d6..b48f666 100644
--- a/lib/cache/eviction/lru.rs
+++ b/lib/cache/eviction/lru.rs
@@ -2,7 +2,10 @@
 
 use std::{future::Future, hash::Hash};
 
-use crate::sync::{Arc, OnceLock, atomic, atomic::AtomicI64, atomic::AtomicUsize};
+use std::sync::{
+    Arc, OnceLock,
+    atomic::{self, AtomicI64},
+};
 
 use hashlink::LinkedHashMap;
 use tokio::{
@@ -29,7 +32,7 @@ pub trait Deleter<K, Ctx>: Send + Clone + 'static {
 #[derive(Debug, Clone, Copy)]
 enum Message<K, C> {
     /// Notify the LRU eviction tracker that the given key was accessed.
-    Accessed(K, usize),
+    Accessed(K),
     /// Request an eviction set of the given size.
     Evict(u32),
     /// Notify the LRU eviction tracker that a key was inserted or overwritten.
@@ -142,82 +145,14 @@ impl<K: Copy + Eq + Hash + Send + 'static, C: Versioned + Send + 'static, D: Del
     #[must_use]
     fn service_message(&mut self, message: Message<K, C>) -> bool {
         match message {
-            Message::Accessed(k, ev_gen) => {
-                if ev_gen
-                    < self
-                        .shared
-                        .eviction_generation
-                        .load(atomic::Ordering::Relaxed)
-                {
-                    // This is a ghost access. Happens when one client sends an eviction, but
-                    // other clients add accesses to the same keys, before our worker thread had
-                    // a chance to clean up.
-                    return true;
-                }
-
+            Message::Accessed(k) => {
                 // The key may have been evicted between the access and this message arriving.
+                // If it was, the remove returns None and this is a no-op.
                 if let Some(entry) = self.ordered_key_map.remove(&k) {
                     self.ordered_key_map.insert(k, entry);
                 }
             }
             Message::Evict(max_count) => {
-                // We got an eviction notice. Bump the eviction generation to indicate that all
-                // accesses after this point are considered in the "future". Read the subsequent
-                // comment for an explanation of why we need to do this.
-                self.shared
-                    .eviction_generation
-                    .fetch_add(1, atomic::Ordering::Relaxed);
-                // Before we send off the eviction set, we actually need to remove the evicted
-                // keys from the pending queue. Read the following for an explanation.
-                //
-                // The client may access a key AFTER an eviction notice. Until the eviction
-                // notice is processed by the client, the client will have the key available to
-                // it.
-                //
-                // Consequently, the client may very well access the key after it sent out an
-                // eviction notice, but before the eviction notice is processed. This will
-                // result in the key being added as an access message after the eviction
-                // notice, but before the eviction set is sent to the client.
-                //
-                // The problem is, the LruProcessingTask has no way of knowing whether the
-                // message the client sent is a stale message, or a legit message.
-                //
-                // Consider the following queue state in between two loop iterations of the
-                // LruProcessingTask:
-                // A -- access
-                // B -- insert
-                // E -- eviction request
-                // C -- the client thread which made the request
-                //
-                // A1 A2 A3 E1 A1 B1 -- after this point, no clients can access A1
-                // C1 C1 C1 C1 C2 C1 ---------------------------------------------> time
-                //
-                // The scenario here is:
-                // - A1, A2, A3 are used by the clients.
-                // - C1 wants to add B1, but it doesn't have enough space, so it sends an
-                //   eviction request.
-                // - In parallel, C2 accesses A1, which is completely legal.
-                //
-                // The result is that our queue has A1, even after we sent out the eviction
-                // request, and we have no way of knowing whether A1 is a stale message or not.
-                //
-                // To prevent this whole "is this thing stale or not" problem, we need to
-                // "erase" keys evicted in the future. Unfortunately, you can't mutate queues,
-                // so the best we can do is simply mark the stupid keys as "evicted" and ignore
-                // any accesses from these keys until they're marked as "inserted" again.
-                //
-                // This is what the generation counter does -- it allows us to track which
-                // messages are considered "stale". There are some problems with the generation
-                // counter, however. We drop all messages that are "generationally old". Under
-                // high contention, this results in the eviction policy acting like a fuzzy LRU
-                // policy, rather than a strict LRU policy, but for the purposes of this cache,
-                // this is an acceptable tradeoff.
-                //
-                // TODO(markovejnovic): Make this a strict LRU.
-                //
-                // We cannot mutate the queue to mark messages as stale, so the best we can do
-                // is mark the keys as "evicted" and ignore any accesses from these keys until
-                // they're marked as "inserted" again.
                 {
                     // These integer casts are safe, since max_count is guaranteed to fit
                     // within a u32, min(MAX_U32, MAX_USIZE) == MAX_U32.
@@ -272,7 +207,6 @@ struct WorkerState {
     /// Packed atomic tracking pending eviction batches (upper 32 bits) and active deletions
     /// (lower 32 bits). See `DeletionIndicator` for the full protocol.
     active_deletions: DeletionIndicator,
-    eviction_generation: AtomicUsize,
     worker: OnceLock<JoinHandle<()>>,
 }
 
@@ -293,7 +227,6 @@ impl<K: Copy + Eq + Send + Hash + 'static, C: Versioned + Copy + Send + 'static>
 
         let worker_state = Arc::new(WorkerState {
             active_deletions: DeletionIndicator::new(),
-            eviction_generation: AtomicUsize::new(0),
             worker: OnceLock::new(),
         });
         let worker = LruProcessingTask::spawn_task(deleter, rx, Arc::clone(&worker_state));
@@ -331,13 +264,7 @@ impl<K: Copy + Eq + Send + Hash + 'static, C: Versioned + Copy + Send + 'static>
     /// Non-cancellable: uses `try_send` (sync) with a `tokio::spawn` fallback so that the
     /// notification cannot be lost to task cancellation.
     pub fn access(&self, key: K) {
-        let msg = Message::Accessed(
-            key,
-            self.worker_state
-                .eviction_generation
-                .load(atomic::Ordering::Relaxed),
-        );
-        match self.worker_message_sender.try_send(msg) {
+        match self.worker_message_sender.try_send(Message::Accessed(key)) {
             Ok(()) | Err(TrySendError::Closed(_)) => {}
             Err(TrySendError::Full(msg)) => {
                 let sender = self.worker_message_sender.clone();
diff --git a/lib/cache/fcache.rs b/lib/cache/fcache.rs
index 2140ce6..a68cce2 100644
--- a/lib/cache/fcache.rs
+++ b/lib/cache/fcache.rs
@@ -6,7 +6,10 @@ use std::{
     path::{Path, PathBuf},
 };
 
-use crate::sync::{Arc, atomic, atomic::AtomicU64, atomic::AtomicUsize};
+use std::sync::{
+    Arc,
+    atomic::{self, AtomicU64, AtomicUsize},
+};
 
 use tracing::error;
 
diff --git a/lib/lib.rs b/lib/lib.rs
index 390f786..058aae4 100644
--- a/lib/lib.rs
+++ b/lib/lib.rs
@@ -7,4 +7,3 @@
 /// Caching primitives for git-fs.
 pub mod cache;
 pub mod io;
-pub mod sync;
diff --git a/lib/sync/atomic.rs b/lib/sync/atomic.rs
deleted file mode 100644
index 2c77deb..0000000
--- a/lib/sync/atomic.rs
+++ /dev/null
@@ -1,13 +0,0 @@
-//! Re-export of atomics, so that we can use loom's atomics when testing with loom.
-
-#[cfg(loom)]
-pub use loom::sync::atomic::{
-    AtomicBool, AtomicI8, AtomicI16, AtomicI32, AtomicI64, AtomicIsize, AtomicU8, AtomicU16,
-    AtomicU32, AtomicU64, AtomicUsize, Ordering,
-};
-
-#[cfg(not(loom))]
-pub use std::sync::atomic::{
-    AtomicBool, AtomicI8, AtomicI16, AtomicI32, AtomicI64, AtomicIsize, AtomicU8, AtomicU16,
-    AtomicU32, AtomicU64, AtomicUsize, Ordering,
-};
diff --git a/lib/sync/mod.rs b/lib/sync/mod.rs
deleted file mode 100644
index 32ce948..0000000
--- a/lib/sync/mod.rs
+++ /dev/null
@@ -1,67 +0,0 @@
-//! Synchronization primitives.
-//!
-//! Shims between loom and std synchronization primitives.
-pub mod atomic;
-
-#[cfg(loom)]
-pub use loom::sync::{Arc, Condvar, Mutex, Once, RwLock, RwLockReadGuard, RwLockWriteGuard};
-
-#[cfg(loom)]
-pub struct OnceLock<T> {
-    once: Once<T>,
-}
-
-#[cfg(loom)]
-unsafe impl<T: Send + Sync> Send for OnceLock<T> {}
-
-#[cfg(loom)]
-unsafe impl<T: Send + Sync> Sync for OnceLock<T> {}
-
-#[cfg(loom)]
-impl<T> OnceLock<T> {
-    pub fn new() -> Self {
-        Self {
-            once: Once::new(),
-            value: UnsafeCell::new(None),
-        }
-    }
-
-    pub fn get(&self) -> Option<&T> {
-        if self.once.is_completed() {
-            // Safety: once is completed, so value is initialized
-            // and will never be written to again
-            unsafe { (*self.value.with(|ptr| ptr)).as_ref() }
-        } else {
-            None
-        }
-    }
-
-    pub fn get_or_init(&self, f: impl FnOnce() -> T) -> &T {
-        self.once.call_once(|| unsafe {
-            self.value.with_mut(|ptr| {
-                *ptr = Some(f());
-            });
-        });
-        // Safety: call_once guarantees initialization is complete
-        unsafe { (*self.value.with(|ptr| ptr)).as_ref().unwrap() }
-    }
-
-    pub fn set(&self, value: T) -> Result<(), T> {
-        let mut value = Some(value);
-        self.once.call_once(|| {
-            let val = value.take().unwrap();
-            unsafe {
-                self.value.with_mut(|ptr| {
-                    *ptr = Some(val);
-                });
-            }
-        });
-        match value {
-            None => Ok(()),    // we consumed it — success
-            Some(v) => Err(v), // already initialized
-        }
-    }
-}
-
-#[cfg(not(loom))]
-pub use std::sync::{Arc, Condvar, Mutex, OnceLock, RwLock, RwLockReadGuard, RwLockWriteGuard};

From b282b3bc83d727f818551197b120fcf58fbac0e7 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Sun, 15 Feb 2026 23:11:11 -0800
Subject: [PATCH 15/33] more cleanup

---
 lib/cache/eviction/lru.rs | 30 ++++++++++--------------------
 lib/cache/fcache.rs       |  8 ++++++--
 lib/lib.rs                |  1 -
 3 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/lib/cache/eviction/lru.rs b/lib/cache/eviction/lru.rs
index b48f666..ff56b2c 100644
--- a/lib/cache/eviction/lru.rs
+++ b/lib/cache/eviction/lru.rs
@@ -3,15 +3,12 @@
 use std::{future::Future, hash::Hash};
 
 use std::sync::{
-    Arc, OnceLock,
+    Arc,
     atomic::{self, AtomicI64},
 };
 
 use hashlink::LinkedHashMap;
-use tokio::{
-    sync::mpsc::{Receiver, error::TrySendError},
-    task::JoinHandle,
-};
+use tokio::sync::mpsc::{Receiver, error::TrySendError};
 
 /// Types that carry a monotonic version for deduplication of out-of-order messages.
 pub trait Versioned {
@@ -22,9 +19,9 @@ pub trait Versioned {
 /// A trait for deleting keys from the cache. This is used by the LRU eviction tracker to delete
 /// keys when they are evicted.
 pub trait Deleter<K, Ctx>: Send + Clone + 'static {
-    /// Delete the given keys from the cache. The keys are guaranteed to be in the order of
-    /// eviction. You absolutely MUST delete the keys in the order they are given, otherwise the
-    /// LRU eviction tracker will get very confused and break.
+    /// Delete the given key from the cache. Deletions for different keys may be invoked
+    /// concurrently and in arbitrary order. Correctness is ensured by the caller through
+    /// a context-based guard (e.g. matching on a unique file ID), not by invocation order.
     fn delete(&mut self, key: K, ctx: Ctx) -> impl Future<Output = ()> + Send;
 }
 
@@ -123,16 +120,12 @@ impl<K: Copy + Eq + Hash + Send + 'static, C: Versioned + Send + 'static, D: Del
         }
     }
 
-    fn spawn_task(
-        deleter: D,
-        receiver: Receiver<Message<K, C>>,
-        shared: Arc<WorkerState>,
-    ) -> JoinHandle<()> {
+    fn spawn_task(deleter: D, receiver: Receiver<Message<K, C>>, shared: Arc<WorkerState>) {
         // TODO(markovejnovic): This should have a best-effort drop.
         tokio::spawn(async move {
             let mut task = Self::new(deleter, receiver, shared);
             task.work().await;
-        })
+        });
     }
 
     async fn work(&mut self) {
@@ -207,7 +200,6 @@ struct WorkerState {
     /// Packed atomic tracking pending eviction batches (upper 32 bits) and active deletions
     /// (lower 32 bits). See `DeletionIndicator` for the full protocol.
     active_deletions: DeletionIndicator,
-    worker: OnceLock<JoinHandle<()>>,
 }
 
 /// An LRU eviction tracker. This is used to track the least recently used keys in the cache, and
@@ -227,12 +219,10 @@ impl<K: Copy + Eq + Send + Hash + 'static, C: Versioned + Copy + Send + 'static>
 
         let worker_state = Arc::new(WorkerState {
             active_deletions: DeletionIndicator::new(),
-            worker: OnceLock::new(),
         });
-        let worker = LruProcessingTask::spawn_task(deleter, rx, Arc::clone(&worker_state));
-        if worker_state.worker.set(worker).is_err() {
-            unreachable!("worker should only be set once, and we just set it");
-        }
+        // The worker task is intentionally detached: it exits when the channel closes
+        // (i.e. when all senders, including the one held by LruEvictionTracker, are dropped).
+        LruProcessingTask::spawn_task(deleter, rx, Arc::clone(&worker_state));
 
         Self {
             worker_message_sender: tx,
diff --git a/lib/cache/fcache.rs b/lib/cache/fcache.rs
index a68cce2..8d69f1a 100644
--- a/lib/cache/fcache.rs
+++ b/lib/cache/fcache.rs
@@ -396,10 +396,14 @@ impl<K: Eq + Hash + Copy + Send + Sync + 'static + Debug>
             }
         }
 
-        // Write file to disk.
+        // Write file to disk. The guard is constructed *before* the `.await` so that if
+        // this future is cancelled while `create_file` is in flight, the drop guard still
+        // cleans up the (potentially created) file.
         let path = self.shared.path_for(new_fid);
+        let mut guard = FileGuard {
+            path: Some(path.clone()),
+        };
         let mut new_file = self.create_file(&path).await?;
-        let mut guard = FileGuard { path: Some(path) };
         new_file.write_all(&value).await?;
 
         // Use entry_async to lock the bucket, then allocate version under the lock.
diff --git a/lib/lib.rs b/lib/lib.rs
index 058aae4..34ca807 100644
--- a/lib/lib.rs
+++ b/lib/lib.rs
@@ -2,7 +2,6 @@
 
 #![feature(negative_impls)]
 #![feature(with_negative_coherence)]
-#![feature(fn_traits)]
 
 /// Caching primitives for git-fs.
 pub mod cache;

From 5bda38ae4a4387a5481870a58534cb53685ac24c Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Sun, 15 Feb 2026 23:54:02 -0800
Subject: [PATCH 16/33] fix: remove nightly feature flags by deleting unused
 trait abstractions

The unified ReadableCache/WritableCache traits and Sync* variants had no
consumers outside traits.rs itself. Remove them along with the negative
impls that required nightly, keeping only AsyncReadableCache and
AsyncWritableCache which FileCache actually implements.
---
 lib/cache/traits.rs | 213 ++------------------------------------------
 lib/lib.rs          |   3 -
 2 files changed, 8 insertions(+), 208 deletions(-)

diff --git a/lib/cache/traits.rs b/lib/cache/traits.rs
index 9a3d7ac..5a38464 100644
--- a/lib/cache/traits.rs
+++ b/lib/cache/traits.rs
@@ -1,176 +1,25 @@
-use std::{future::Future, hash::Hash, pin::Pin};
+use std::hash::Hash;
 
-/// A readable cache is a cache that can be read from, but not necessarily written to.
-///
-/// This trait is designed to be a generic trait for any cache. When using a cache as a generic
-/// parameter, this is the correct trait to use.
-///
-/// You should avoid manually implementing this trait, and prefer implementing the
-/// `SyncReadableCache` or `AsyncReadableCache` traits instead, which will automatically implement
-/// this trait for you.
-pub trait ReadableCache<K, V>
-where
-    K: Eq + Hash,
-    V: Clone,
-{
-    /// The future type returned by the `get` and `contains` methods. This is used to allow
-    /// zero-cost synchronous and asynchronous implementations of the `ReadableCache` trait.
-    type Fut<'a, T: 'a>: Future<Output = T> + 'a
-    where
-        Self: 'a,
-        K: 'a,
-        V: 'a;
-
-    /// Fetch the value associated with the given key from the cache. If the key is not present in
-    /// the cache, return `None`.
-    fn get<'a>(&'a self, key: &'a K) -> Self::Fut<'a, Option<V>>
-    where
-        V: 'a;
-
-    /// Check if the cache contains the given key.
-    fn contains<'a>(&'a self, key: &'a K) -> Self::Fut<'a, bool>
-    where
-        V: 'a;
-}
-
-/// Convenience trait for implementing `ReadableCache` with synchronous cache accesses.
-///
-/// Avoid using this cache as a generic parameter. `ReadableCache` is the correct trait to use for
-/// generic parameters, and will support both synchronous and asynchronous implementations of the
-/// cache.
-pub trait SyncReadableCache<K, V>
-where
-    K: Eq + Hash,
-    V: Clone,
-{
-    /// See `ReadableCache::get`.
-    fn get(&self, key: &K) -> Option<V>;
-
-    /// See `ReadableCache::contains`.
-    fn contains(&self, key: &K) -> bool;
-}
-
-/// Convenience trait for implementing `ReadableCache` with asynchronous cache accesses.
-///
-/// Avoid using this cache as a generic parameter. `ReadableCache` is the correct trait to use for
-/// generic parameters, and will support both synchronous and asynchronous implementations of the
-/// cache.
+/// A readable cache that can be read from asynchronously.
 #[expect(async_fn_in_trait)]
 pub trait AsyncReadableCache<K, V>
 where
     K: Eq + Hash,
     V: Clone,
 {
-    /// See `ReadableCache::get`.
+    /// Fetch the value associated with the given key from the cache. If the key is not present in
+    /// the cache, return `None`.
     async fn get(&self, key: &K) -> Option<V>;
 
-    /// See `ReadableCache::contains`.
+    /// Check if the cache contains the given key.
     async fn contains(&self, key: &K) -> bool;
 }
 
-impl<K: Eq + Hash, V: Clone, T: AsyncReadableCache<K, V>> !SyncReadableCache<K, V> for T {}
-
-impl<K: Eq + Hash, V: Clone, T: AsyncReadableCache<K, V>> ReadableCache<K, V> for T {
-    type Fut<'a, O: 'a>
-        = Pin<Box<dyn Future<Output = O> + 'a>>
-    where
-        Self: 'a,
-        K: 'a,
-        V: 'a;
-
-    fn get<'a>(&'a self, key: &'a K) -> Self::Fut<'a, Option<V>>
-    where
-        V: 'a,
-    {
-        Box::pin(AsyncReadableCache::get(self, key))
-    }
-
-    fn contains<'a>(&'a self, key: &'a K) -> Self::Fut<'a, bool>
-    where
-        V: 'a,
-    {
-        Box::pin(AsyncReadableCache::contains(self, key))
-    }
-}
-
-impl<K: Eq + Hash, V: Clone, T: SyncReadableCache<K, V>> ReadableCache<K, V> for T {
-    type Fut<'a, O: 'a>
-        = std::future::Ready<O>
-    where
-        Self: 'a,
-        K: 'a,
-        V: 'a;
-
-    fn get<'a>(&'a self, key: &'a K) -> Self::Fut<'a, Option<V>>
-    where
-        V: 'a,
-    {
-        std::future::ready(SyncReadableCache::get(self, key))
-    }
-
-    fn contains<'a>(&'a self, key: &'a K) -> Self::Fut<'a, bool>
-    where
-        V: 'a,
-    {
-        std::future::ready(SyncReadableCache::contains(self, key))
-    }
-}
-
-/// A writable cache is a cache that can be written to, but not necessarily read from.
+/// A writable cache that can be written to asynchronously.
 ///
-/// Note that this trait also does not require the cache support manual eviction of entries. Many
+/// Note that this trait does not require the cache to support manual eviction of entries. Many
 /// caches only support automatic eviction (such as LRU eviction), and this trait is designed to
 /// support those caches as well.
-///
-/// This trait is designed to be a generic trait for any cache. When using a cache as a generic
-/// parameter, this is the correct trait to use.
-///
-/// You should avoid manually implementing this trait, and prefer implementing the
-/// `SyncWritableCache` or `AsyncWritableCache` traits instead, which will automatically implement
-/// this trait for you.
-pub trait WritableCache<K, V, E>
-where
-    K: Eq + Hash,
-    V: Clone,
-    E: std::error::Error,
-{
-    /// The future type returned by the `insert` method. This is used to allow zero-cost
-    /// synchronous and asynchronous implementations of the `WritableCache` trait.
-    type Fut<'a, T: 'a>: Future<Output = T> + 'a
-    where
-        Self: 'a,
-        K: 'a,
-        V: 'a,
-        E: 'a;
-
-    /// Insert the given value into the cache with the given key. If the key was already present in
-    /// the cache, return the old value. Otherwise, return `None`.
-    fn insert<'a>(&'a self, key: &'a K, value: V) -> Self::Fut<'a, Result<(), E>>
-    where
-        V: 'a;
-}
-
-/// Convenience trait for implementing `WritableCache` with synchronous cache accesses.
-///
-/// Avoid using this cache as a generic parameter. `WritableCache` is the correct trait to use for
-/// generic parameters, and will support both synchronous and asynchronous implementations of the
-/// cache.
-pub trait SyncWritableCache<K, V, E>
-where
-    K: Eq + Hash,
-    V: Clone,
-    E: std::error::Error,
-{
-    /// Insert the given value into the cache with the given key. If the key was already present in
-    /// the cache, return the old value. Otherwise, return `None`.
-    fn insert(&self, key: &K, value: V) -> Result<(), E>;
-}
-
-/// Convenience trait for implementing `WritableCache` with asynchronous cache accesses.
-///
-/// Avoid using this cache as a generic parameter. `WritableCache` is the correct trait to use for
-/// generic parameters, and will support both synchronous and asynchronous implementations of the
-/// cache.
 #[expect(async_fn_in_trait)]
 pub trait AsyncWritableCache<K, V, E>
 where
@@ -178,52 +27,6 @@ where
     V: Clone,
     E: std::error::Error,
 {
-    /// Insert the given value into the cache with the given key. If the key was already present in
-    /// the cache, return the old value. Otherwise, return `None`.
+    /// Insert the given value into the cache with the given key.
     async fn insert(&self, key: &K, value: V) -> Result<(), E>;
 }
-
-impl<K: Eq + Hash, V: Clone, E: std::error::Error, T: AsyncWritableCache<K, V, E>>
-    !SyncWritableCache<K, V, E> for T
-{
-}
-
-impl<K: Eq + Hash, V: Clone, E: std::error::Error, T: AsyncWritableCache<K, V, E>>
-    WritableCache<K, V, E> for T
-{
-    type Fut<'a, O: 'a>
-        = Pin<Box<dyn Future<Output = O> + 'a>>
-    where
-        Self: 'a,
-        K: 'a,
-        V: 'a,
-        E: 'a;
-
-    fn insert<'a>(&'a self, key: &'a K, value: V) -> Self::Fut<'a, Result<(), E>>
-    where
-        V: 'a,
-        E: 'a,
-    {
-        Box::pin(AsyncWritableCache::insert(self, key, value))
-    }
-}
-
-impl<K: Eq + Hash, V: Clone, E: std::error::Error, T: SyncWritableCache<K, V, E>>
-    WritableCache<K, V, E> for T
-{
-    type Fut<'a, O: 'a>
-        = std::future::Ready<O>
-    where
-        Self: 'a,
-        K: 'a,
-        V: 'a,
-        E: 'a;
-
-    fn insert<'a>(&'a self, key: &'a K, value: V) -> Self::Fut<'a, Result<(), E>>
-    where
-        V: 'a,
-        E: 'a,
-    {
-        std::future::ready(SyncWritableCache::insert(self, key, value))
-    }
-}
diff --git a/lib/lib.rs b/lib/lib.rs
index 34ca807..f7388bd 100644
--- a/lib/lib.rs
+++ b/lib/lib.rs
@@ -1,8 +1,5 @@
 //! git-fs shared library.
 
-#![feature(negative_impls)]
-#![feature(with_negative_coherence)]
-
 /// Caching primitives for git-fs.
 pub mod cache;
 pub mod io;

From 6d497260553d1c71cec6e2f34bfc623b11b3778d Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Mon, 16 Feb 2026 00:09:42 -0800
Subject: [PATCH 17/33] chore: add tempfile dev-dependency for integration
 tests

---
 Cargo.lock | 1 +
 Cargo.toml | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/Cargo.lock b/Cargo.lock
index ebbe571..1002ac9 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -795,6 +795,7 @@ dependencies = [
  "serde_json",
  "serde_path_to_error",
  "shellexpand",
+ "tempfile",
  "thiserror 2.0.18",
  "tokio",
  "toml",
diff --git a/Cargo.toml b/Cargo.toml
index 399c11b..9cd0c59 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -56,6 +56,9 @@ hashlink = "0.11.0"
 default = []
 staging = []
 
+[dev-dependencies]
+tempfile = "3"
+
 [build-dependencies]
 vergen-gitcl = { version = "1", features = [] }
 

From 7edb22f7a12060210904c35357e55f3bce2ffa00 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Mon, 16 Feb 2026 00:10:40 -0800
Subject: [PATCH 18/33] test: add shared mock deleter and helpers for LRU tests

---
 tests/common/mod.rs | 54 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 tests/common/mod.rs

diff --git a/tests/common/mod.rs b/tests/common/mod.rs
new file mode 100644
index 0000000..d7d0cea
--- /dev/null
+++ b/tests/common/mod.rs
@@ -0,0 +1,54 @@
+#![allow(missing_docs, clippy::unwrap_used)]
+
+use std::sync::{Arc, Mutex};
+use std::time::Duration;
+
+use git_fs::cache::eviction::lru::{Deleter, LruEvictionTracker, Versioned};
+
+/// Minimal versioned context for LRU tests.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct MockCtx {
+    pub version: u64,
+}
+
+impl Versioned for MockCtx {
+    fn version(&self) -> u64 {
+        self.version
+    }
+}
+
+/// A mock deleter that records every (key, ctx) pair it receives.
+#[derive(Clone)]
+pub struct MockDeleter {
+    pub deleted: Arc<Mutex<Vec<(u64, MockCtx)>>>,
+}
+
+impl MockDeleter {
+    pub fn new() -> Self {
+        Self {
+            deleted: Arc::new(Mutex::new(Vec::new())),
+        }
+    }
+
+    /// Returns just the keys that were deleted, in deletion order.
+    pub fn deleted_keys(&self) -> Vec<u64> {
+        self.deleted.lock().unwrap().iter().map(|(k, _)| *k).collect()
+    }
+}
+
+impl Deleter<u64, MockCtx> for MockDeleter {
+    async fn delete(&mut self, key: u64, ctx: MockCtx) {
+        self.deleted.lock().unwrap().push((key, ctx));
+    }
+}
+
+/// Poll `have_pending_culls()` until it returns false, or panic after timeout.
+pub async fn wait_for_culls(tracker: &LruEvictionTracker<u64, MockCtx>) {
+    for _ in 0..200 {
+        if !tracker.have_pending_culls() {
+            return;
+        }
+        tokio::time::sleep(Duration::from_millis(5)).await;
+    }
+    panic!("culls did not complete within 1 second");
+}

From 9dd570b0df2c263de5b1a24bc50606946033b33d Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Mon, 16 Feb 2026 00:13:11 -0800
Subject: [PATCH 19/33] test: add FileCache construction tests

---
 tests/fcache_correctness.rs | 77 +++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 tests/fcache_correctness.rs

diff --git a/tests/fcache_correctness.rs b/tests/fcache_correctness.rs
new file mode 100644
index 0000000..1814d12
--- /dev/null
+++ b/tests/fcache_correctness.rs
@@ -0,0 +1,77 @@
+#![allow(clippy::unwrap_used, missing_docs)]
+
+use git_fs::cache::fcache::{FileCache, InvalidRootPathError};
+use git_fs::cache::traits::{AsyncReadableCache as _, AsyncWritableCache as _};
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn new_creates_directory_and_marker() {
+    let tmp = tempfile::tempdir().unwrap();
+    let cache_path = tmp.path().join("cache");
+
+    let _cache = FileCache::<u64>::new(&cache_path, 4096).await.unwrap();
+
+    assert!(cache_path.join(".gitfs_cache").exists());
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn new_with_existing_empty_directory() {
+    let tmp = tempfile::tempdir().unwrap();
+    // tmp.path() is an existing empty directory
+    let _cache = FileCache::<u64>::new(tmp.path(), 4096).await.unwrap();
+
+    assert!(tmp.path().join(".gitfs_cache").exists());
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn new_rejects_file_path() {
+    let tmp = tempfile::tempdir().unwrap();
+    let file_path = tmp.path().join("not_a_dir");
+    std::fs::write(&file_path, b"hello").unwrap();
+
+    let result = FileCache::<u64>::new(&file_path, 4096).await;
+
+    assert!(
+        matches!(result, Err(InvalidRootPathError::NotADirectory(_))),
+        "expected NotADirectory, got {:?}",
+        result.as_ref().map(|_| "Ok(...)").map_err(|e| format!("{e:?}"))
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn new_rejects_non_empty_unmarked_directory() {
+    let tmp = tempfile::tempdir().unwrap();
+    // Place a foreign file so the directory is non-empty and has no marker
+    std::fs::write(tmp.path().join("foreign.txt"), b"data").unwrap();
+
+    let result = FileCache::<u64>::new(tmp.path(), 4096).await;
+
+    assert!(
+        matches!(result, Err(InvalidRootPathError::RootPathUnsafeCache(_))),
+        "expected RootPathUnsafeCache, got {:?}",
+        result.as_ref().map(|_| "Ok(...)").map_err(|e| format!("{e:?}"))
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn new_cleans_previously_used_directory() {
+    let tmp = tempfile::tempdir().unwrap();
+    let cache_path = tmp.path().join("cache");
+
+    // First use: create cache and insert a value
+    {
+        let cache = FileCache::<u64>::new(&cache_path, 4096).await.unwrap();
+        cache.insert(&1, b"value-1".to_vec()).await.unwrap();
+        // FileCache::drop removes the directory
+    }
+
+    // Recreate the directory with a marker to simulate a restart
+    std::fs::create_dir_all(&cache_path).unwrap();
+    std::fs::write(cache_path.join(".gitfs_cache"), b"").unwrap();
+    std::fs::write(cache_path.join("leftover"), b"stale").unwrap();
+
+    // Second use: should clean old contents
+    let cache = FileCache::<u64>::new(&cache_path, 4096).await.unwrap();
+
+    // Old data should not be accessible
+    assert!(cache.get(&1).await.is_none());
+}

From 649ec56c73dd1200ccfbc1573d641dc9d24c277f Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Mon, 16 Feb 2026 00:14:18 -0800
Subject: [PATCH 20/33] test: add FileCache read/write tests

---
 tests/fcache_correctness.rs | 54 +++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/tests/fcache_correctness.rs b/tests/fcache_correctness.rs
index 1814d12..05b1265 100644
--- a/tests/fcache_correctness.rs
+++ b/tests/fcache_correctness.rs
@@ -75,3 +75,57 @@ async fn new_cleans_previously_used_directory() {
     // Old data should not be accessible
     assert!(cache.get(&1).await.is_none());
 }
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn insert_and_get_returns_value() {
+    let tmp = tempfile::tempdir().unwrap();
+    let cache = FileCache::<u64>::new(tmp.path(), 4096).await.unwrap();
+
+    cache.insert(&42, b"hello world".to_vec()).await.unwrap();
+
+    let val = cache.get(&42).await;
+    assert_eq!(val.as_deref(), Some(b"hello world".as_slice()));
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn get_missing_key_returns_none() {
+    let tmp = tempfile::tempdir().unwrap();
+    let cache = FileCache::<u64>::new(tmp.path(), 4096).await.unwrap();
+
+    assert!(cache.get(&999).await.is_none());
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn contains_reflects_presence() {
+    let tmp = tempfile::tempdir().unwrap();
+    let cache = FileCache::<u64>::new(tmp.path(), 4096).await.unwrap();
+
+    assert!(!cache.contains(&1).await);
+
+    cache.insert(&1, b"data".to_vec()).await.unwrap();
+
+    assert!(cache.contains(&1).await);
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn insert_overwrites_previous_value() {
+    let tmp = tempfile::tempdir().unwrap();
+    let cache = FileCache::<u64>::new(tmp.path(), 4096).await.unwrap();
+
+    cache.insert(&1, b"first".to_vec()).await.unwrap();
+    cache.insert(&1, b"second".to_vec()).await.unwrap();
+
+    let val = cache.get(&1).await;
+    assert_eq!(val.as_deref(), Some(b"second".as_slice()));
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn insert_empty_value() {
+    let tmp = tempfile::tempdir().unwrap();
+    let cache = FileCache::<u64>::new(tmp.path(), 4096).await.unwrap();
+
+    cache.insert(&1, Vec::new()).await.unwrap();
+
+    let val = cache.get(&1).await;
+    assert_eq!(val.as_deref(), Some(b"".as_slice()));
+}

From 9b63eff3572b9c1102ea4ba0a388ba4a6ea0e50e Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Mon, 16 Feb 2026 00:15:21 -0800
Subject: [PATCH 21/33] test: add FileCache eviction and cleanup tests

---
 tests/fcache_correctness.rs | 58 +++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/tests/fcache_correctness.rs b/tests/fcache_correctness.rs
index 05b1265..df6e2f3 100644
--- a/tests/fcache_correctness.rs
+++ b/tests/fcache_correctness.rs
@@ -129,3 +129,61 @@ async fn insert_empty_value() {
     let val = cache.get(&1).await;
     assert_eq!(val.as_deref(), Some(b"".as_slice()));
 }
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn eviction_removes_oldest_entries() {
+    // max_size = 100 bytes. Insert 5 × 30-byte entries (150 bytes total).
+    // After the last insert, eviction should have removed at least one early entry.
+    let tmp = tempfile::tempdir().unwrap();
+    let cache = FileCache::<u64>::new(tmp.path(), 100).await.unwrap();
+
+    for i in 0u64..5 {
+        let value = vec![b'x'; 30];
+        cache.insert(&i, value).await.unwrap();
+    }
+
+    // Give the async eviction worker time to complete deletions.
+    tokio::time::sleep(std::time::Duration::from_millis(200)).await;
+
+    // At least one of the earliest keys should have been evicted.
+    let mut present_count = 0;
+    for i in 0u64..5 {
+        if cache.get(&i).await.is_some() {
+            present_count += 1;
+        }
+    }
+
+    assert!(
+        present_count < 5,
+        "expected at least one eviction, but all 5 entries are still present"
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn entry_larger_than_max_size_still_inserted() {
+    let tmp = tempfile::tempdir().unwrap();
+    let cache = FileCache::<u64>::new(tmp.path(), 10).await.unwrap();
+
+    // Value is 50 bytes, far exceeding max_size of 10.
+    let big_value = vec![b'A'; 50];
+    cache.insert(&1, big_value.clone()).await.unwrap();
+
+    let val = cache.get(&1).await;
+    assert_eq!(val, Some(big_value));
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn drop_removes_cache_directory() {
+    let tmp = tempfile::tempdir().unwrap();
+    let cache_path = tmp.path().join("cache");
+
+    {
+        let cache = FileCache::<u64>::new(&cache_path, 4096).await.unwrap();
+        cache.insert(&1, b"data".to_vec()).await.unwrap();
+    }
+    // After drop, the cache directory should be gone.
+    assert!(
+        !cache_path.exists(),
+        "cache directory should have been removed on drop"
+    );
+}

From 4c3386cdfe786b55aadd00a7aaadc00e086b4e78 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Mon, 16 Feb 2026 00:16:42 -0800
Subject: [PATCH 22/33] test: add LRU tracker core eviction tests

---
 tests/lru_correctness.rs | 63 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 tests/lru_correctness.rs

diff --git a/tests/lru_correctness.rs b/tests/lru_correctness.rs
new file mode 100644
index 0000000..d9bf5db
--- /dev/null
+++ b/tests/lru_correctness.rs
@@ -0,0 +1,63 @@
+#![allow(clippy::unwrap_used, clippy::similar_names, missing_docs)]
+
+mod common;
+
+use common::{MockCtx, MockDeleter, wait_for_culls};
+use git_fs::cache::eviction::lru::LruEvictionTracker;
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn evicts_least_recently_inserted() {
+    let deleter = MockDeleter::new();
+    let tracker = LruEvictionTracker::spawn(deleter.clone(), 64);
+
+    // Insert keys 1, 2, 3 in order. Key 1 is the oldest.
+    tracker.upsert(1, MockCtx { version: 1 });
+    tracker.upsert(2, MockCtx { version: 2 });
+    tracker.upsert(3, MockCtx { version: 3 });
+
+    // Let the worker process the upsert messages.
+    tokio::time::sleep(std::time::Duration::from_millis(50)).await;
+
+    // Evict 1 key — should be key 1 (least recently inserted).
+    assert!(tracker.try_cull(1), "try_cull should succeed");
+    wait_for_culls(&tracker).await;
+
+    let deleted = deleter.deleted_keys();
+    assert_eq!(deleted, vec![1], "key 1 should be evicted first");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn access_moves_key_to_back() {
+    let deleter = MockDeleter::new();
+    let tracker = LruEvictionTracker::spawn(deleter.clone(), 64);
+
+    tracker.upsert(1, MockCtx { version: 1 });
+    tracker.upsert(2, MockCtx { version: 2 });
+    tracker.upsert(3, MockCtx { version: 3 });
+
+    tokio::time::sleep(std::time::Duration::from_millis(50)).await;
+
+    // Access key 1, moving it to the back of the LRU queue.
+    tracker.access(1);
+
+    tokio::time::sleep(std::time::Duration::from_millis(50)).await;
+
+    // Evict 1 key — should now be key 2 (key 1 was refreshed).
+    assert!(tracker.try_cull(1));
+    wait_for_culls(&tracker).await;
+
+    let deleted = deleter.deleted_keys();
+    assert_eq!(deleted, vec![2], "key 2 should be evicted since key 1 was accessed");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn try_cull_returns_true_on_success() {
+    let deleter = MockDeleter::new();
+    let tracker = LruEvictionTracker::spawn(deleter, 64);
+
+    tracker.upsert(1, MockCtx { version: 1 });
+    tokio::time::sleep(std::time::Duration::from_millis(50)).await;
+
+    let result = tracker.try_cull(1);
+    assert!(result, "try_cull should return true when channel has space");
+}

From 62d8bb67ce84f03fa953773dcd29571c0d1889dd Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Mon, 16 Feb 2026 00:18:35 -0800
Subject: [PATCH 23/33] test: add LRU tracker edge case tests

---
 tests/lru_correctness.rs | 90 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/tests/lru_correctness.rs b/tests/lru_correctness.rs
index d9bf5db..1dfc505 100644
--- a/tests/lru_correctness.rs
+++ b/tests/lru_correctness.rs
@@ -61,3 +61,93 @@ async fn try_cull_returns_true_on_success() {
     let result = tracker.try_cull(1);
     assert!(result, "try_cull should return true when channel has space");
 }
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn have_pending_culls_reflects_state() {
+    let deleter = MockDeleter::new();
+    let tracker = LruEvictionTracker::spawn(deleter, 64);
+
+    // No culls requested yet.
+    assert!(!tracker.have_pending_culls());
+
+    tracker.upsert(1, MockCtx { version: 1 });
+    tokio::time::sleep(std::time::Duration::from_millis(50)).await;
+
+    // Request a cull.
+    let _ = tracker.try_cull(1);
+
+    // Immediately after try_cull, pending work should be true
+    // (unless the worker already processed it, which is unlikely but possible).
+    // Wait for completion and verify it clears.
+    wait_for_culls(&tracker).await;
+    assert!(!tracker.have_pending_culls());
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn stale_version_ignored() {
+    let deleter = MockDeleter::new();
+    let tracker = LruEvictionTracker::spawn(deleter.clone(), 64);
+
+    // Upsert key 1 with version 5, then send a stale version 3.
+    tracker.upsert(1, MockCtx { version: 5 });
+    tracker.upsert(1, MockCtx { version: 3 });
+
+    tokio::time::sleep(std::time::Duration::from_millis(50)).await;
+
+    // Evict — the context should carry version 5 (stale version 3 was dropped).
+    assert!(tracker.try_cull(1));
+    wait_for_culls(&tracker).await;
+
+    let deleted = deleter.deleted.lock().unwrap();
+    assert_eq!(deleted.len(), 1);
+    assert_eq!(deleted[0].1.version, 5, "should carry version 5, not stale version 3");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn evict_more_than_available() {
+    let deleter = MockDeleter::new();
+    let tracker = LruEvictionTracker::spawn(deleter.clone(), 64);
+
+    tracker.upsert(1, MockCtx { version: 1 });
+    tracker.upsert(2, MockCtx { version: 2 });
+
+    tokio::time::sleep(std::time::Duration::from_millis(50)).await;
+
+    // Request eviction of 100 keys but only 2 exist.
+    assert!(tracker.try_cull(100));
+    wait_for_culls(&tracker).await;
+
+    let deleted = deleter.deleted_keys();
+    assert_eq!(deleted.len(), 2, "should only evict the 2 available keys");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn multiple_eviction_rounds() {
+    let deleter = MockDeleter::new();
+    let tracker = LruEvictionTracker::spawn(deleter.clone(), 64);
+
+    for i in 1u64..=6 {
+        tracker.upsert(i, MockCtx { version: i });
+    }
+
+    tokio::time::sleep(std::time::Duration::from_millis(50)).await;
+
+    // Round 1: evict 2
+    assert!(tracker.try_cull(2));
+    wait_for_culls(&tracker).await;
+
+    // Round 2: evict 2 more
+    assert!(tracker.try_cull(2));
+    wait_for_culls(&tracker).await;
+
+    let deleted = deleter.deleted_keys();
+    assert_eq!(deleted.len(), 4, "should have evicted 4 total across 2 rounds");
+    // First round evicts keys 1, 2; second round evicts keys 3, 4.
+    // Deletion tasks within a round may complete in any order, so sort each batch.
+    let mut round1: Vec<_> = deleted[..2].to_vec();
+    round1.sort_unstable();
+    let mut round2: Vec<_> = deleted[2..].to_vec();
+    round2.sort_unstable();
+    assert_eq!(round1, vec![1, 2]);
+    assert_eq!(round2, vec![3, 4]);
+}

From 6d1a11efc007b15c778821122968025134c195d2 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Mon, 16 Feb 2026 00:20:38 -0800
Subject: [PATCH 24/33] test: add FileCache concurrency tests

---
 tests/fcache_concurrent.rs | 143 +++++++++++++++++++++++++++++++++++++
 1 file changed, 143 insertions(+)
 create mode 100644 tests/fcache_concurrent.rs

diff --git a/tests/fcache_concurrent.rs b/tests/fcache_concurrent.rs
new file mode 100644
index 0000000..5d704c3
--- /dev/null
+++ b/tests/fcache_concurrent.rs
@@ -0,0 +1,143 @@
+#![allow(clippy::unwrap_used, clippy::expect_used, missing_docs)]
+
+use std::sync::Arc;
+
+use git_fs::cache::fcache::FileCache;
+use git_fs::cache::traits::{AsyncReadableCache as _, AsyncWritableCache as _};
+use tokio::task::JoinSet;
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+async fn concurrent_inserts_different_keys() {
+    let tmp = tempfile::tempdir().unwrap();
+    let cache = Arc::new(
+        FileCache::<u64>::new(tmp.path(), 1024 * 1024)
+            .await
+            .unwrap(),
+    );
+
+    let mut set = JoinSet::new();
+    for i in 0u64..100 {
+        let cache = Arc::clone(&cache);
+        set.spawn(async move {
+            cache
+                .insert(&i, format!("value-{i}").into_bytes())
+                .await
+                .unwrap();
+        });
+    }
+    while set.join_next().await.is_some() {}
+
+    // Every key should be present with the correct value.
+    for i in 0u64..100 {
+        let val = cache.get(&i).await;
+        assert_eq!(
+            val.as_deref(),
+            Some(format!("value-{i}").as_bytes()),
+            "key {i} missing or has wrong value"
+        );
+    }
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+async fn concurrent_inserts_same_key() {
+    let tmp = tempfile::tempdir().unwrap();
+    let cache = Arc::new(
+        FileCache::<u64>::new(tmp.path(), 1024 * 1024)
+            .await
+            .unwrap(),
+    );
+
+    let mut set = JoinSet::new();
+    for i in 0u64..50 {
+        let cache = Arc::clone(&cache);
+        set.spawn(async move {
+            cache
+                .insert(&0, format!("value-{i}").into_bytes())
+                .await
+                .unwrap();
+        });
+    }
+    while set.join_next().await.is_some() {}
+
+    // The final value must be one of the inserted values (last writer wins).
+    let val = cache.get(&0).await;
+    assert!(val.is_some(), "key 0 should still be present");
+
+    let val_str = String::from_utf8(val.unwrap()).unwrap();
+    assert!(
+        val_str.starts_with("value-"),
+        "value should be one of the inserted values, got: {val_str}"
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+async fn concurrent_reads_during_writes() {
+    let tmp = tempfile::tempdir().unwrap();
+    let cache = Arc::new(
+        FileCache::<u64>::new(tmp.path(), 1024 * 1024)
+            .await
+            .unwrap(),
+    );
+
+    // Pre-populate with initial value.
+    cache.insert(&0, b"initial".to_vec()).await.unwrap();
+
+    let mut set = JoinSet::new();
+
+    // Spawn 20 writers overwriting key 0.
+    for i in 0u64..20 {
+        let cache = Arc::clone(&cache);
+        set.spawn(async move {
+            cache
+                .insert(&0, format!("write-{i}").into_bytes())
+                .await
+                .unwrap();
+        });
+    }
+
+    // Spawn 20 readers -- each get() should return None or valid UTF-8.
+    for _ in 0..20 {
+        let cache = Arc::clone(&cache);
+        set.spawn(async move {
+            if let Some(val) = cache.get(&0).await {
+                String::from_utf8(val)
+                    .expect("concurrent read returned corrupted (non-UTF-8) data");
+            }
+        });
+    }
+
+    while set.join_next().await.is_some() {}
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+async fn concurrent_inserts_with_eviction() {
+    // Small cache: 200 bytes. 50 tasks each insert 20-byte values.
+    let tmp = tempfile::tempdir().unwrap();
+    let cache = Arc::new(FileCache::<u64>::new(tmp.path(), 200).await.unwrap());
+
+    let mut set = JoinSet::new();
+    for i in 0u64..50 {
+        let cache = Arc::clone(&cache);
+        set.spawn(async move {
+            cache.insert(&i, vec![b'x'; 20]).await.unwrap();
+        });
+    }
+    while set.join_next().await.is_some() {}
+
+    // Give eviction time to settle.
+    tokio::time::sleep(std::time::Duration::from_millis(200)).await;
+
+    // Not all keys can fit (50 x 20 = 1000 > 200). Some must have been evicted.
+    let mut present = 0usize;
+    for i in 0u64..50 {
+        if cache.get(&i).await.is_some() {
+            present += 1;
+        }
+    }
+
+    assert!(
+        present < 50,
+        "expected some evictions under pressure, but all 50 entries survived"
+    );
+    assert!(present > 0, "at least some entries should still be present");
+}

From 4b6a2bb4a8d1031c700ad239a1aa559a288e5d5f Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Mon, 16 Feb 2026 00:22:19 -0800
Subject: [PATCH 25/33] test: add LRU tracker concurrency tests

---
 tests/lru_concurrent.rs | 143 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 143 insertions(+)
 create mode 100644 tests/lru_concurrent.rs

diff --git a/tests/lru_concurrent.rs b/tests/lru_concurrent.rs
new file mode 100644
index 0000000..2fea3b8
--- /dev/null
+++ b/tests/lru_concurrent.rs
@@ -0,0 +1,143 @@
+#![allow(clippy::unwrap_used, clippy::similar_names, missing_docs)]
+
+mod common;
+
+use std::sync::Arc;
+use std::time::Duration;
+
+use common::{MockCtx, MockDeleter, wait_for_culls};
+use git_fs::cache::eviction::lru::LruEvictionTracker;
+use tokio::task::JoinSet;
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+async fn concurrent_upserts() {
+    let deleter = MockDeleter::new();
+    let tracker = Arc::new(LruEvictionTracker::spawn(deleter.clone(), 256));
+
+    let mut set = JoinSet::new();
+    for i in 0u64..100 {
+        let tracker = Arc::clone(&tracker);
+        set.spawn(async move {
+            tracker.upsert(i, MockCtx { version: i });
+        });
+    }
+    while set.join_next().await.is_some() {}
+
+    // Let the worker process all messages.
+    tokio::time::sleep(Duration::from_millis(100)).await;
+
+    // Evict all 100 — every key should appear in the deletion list exactly once.
+    assert!(tracker.try_cull(100));
+    wait_for_culls(&tracker).await;
+
+    let mut deleted = deleter.deleted_keys();
+    deleted.sort_unstable();
+    let expected: Vec<u64> = (0..100).collect();
+    assert_eq!(deleted, expected, "all 100 keys should have been evicted");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+async fn concurrent_access_notifications() {
+    let deleter = MockDeleter::new();
+    let tracker = Arc::new(LruEvictionTracker::spawn(deleter.clone(), 256));
+
+    // Insert keys 1..=10
+    for i in 1u64..=10 {
+        tracker.upsert(i, MockCtx { version: i });
+    }
+    tokio::time::sleep(Duration::from_millis(50)).await;
+
+    // Concurrently access keys 1..=5, making them "recently used."
+    let mut set = JoinSet::new();
+    for i in 1u64..=5 {
+        let tracker = Arc::clone(&tracker);
+        set.spawn(async move {
+            for _ in 0..10 {
+                tracker.access(i);
+            }
+        });
+    }
+    while set.join_next().await.is_some() {}
+    tokio::time::sleep(Duration::from_millis(50)).await;
+
+    // Evict 5 keys — keys 6..=10 should be evicted (they were never accessed).
+    assert!(tracker.try_cull(5));
+    wait_for_culls(&tracker).await;
+
+    let mut deleted = deleter.deleted_keys();
+    deleted.sort_unstable();
+    assert_eq!(
+        deleted,
+        vec![6, 7, 8, 9, 10],
+        "non-accessed keys 6..=10 should be evicted first"
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+async fn concurrent_cull_requests() {
+    let deleter = MockDeleter::new();
+    let tracker = Arc::new(LruEvictionTracker::spawn(deleter.clone(), 256));
+
+    for i in 0u64..20 {
+        tracker.upsert(i, MockCtx { version: i });
+    }
+    tokio::time::sleep(Duration::from_millis(50)).await;
+
+    // Fire 10 concurrent cull requests, each for 2 keys.
+    let mut set = JoinSet::new();
+    for _ in 0..10 {
+        let tracker = Arc::clone(&tracker);
+        set.spawn(async move {
+            let _ = tracker.try_cull(2);
+        });
+    }
+    while set.join_next().await.is_some() {}
+
+    // Wait for all culls to settle.
+    wait_for_culls(&tracker).await;
+
+    // Some keys should have been evicted (exact count depends on channel capacity
+    // and scheduling, but at least some culls should succeed).
+    let deleted = deleter.deleted_keys();
+    assert!(
+        !deleted.is_empty(),
+        "at least some evictions should have occurred"
+    );
+    assert!(
+        deleted.len() <= 20,
+        "cannot evict more keys than exist"
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+async fn high_throughput_message_storm() {
+    let deleter = MockDeleter::new();
+    // Small channel to stress the try_send → spawn fallback path.
+    let tracker = Arc::new(LruEvictionTracker::spawn(deleter.clone(), 4));
+
+    let mut set = JoinSet::new();
+
+    // 50 tasks each upsert and access rapidly.
+    for i in 0u64..50 {
+        let tracker = Arc::clone(&tracker);
+        set.spawn(async move {
+            tracker.upsert(i, MockCtx { version: i });
+            tracker.access(i);
+            tracker.upsert(i, MockCtx { version: i + 100 });
+        });
+    }
+    while set.join_next().await.is_some() {}
+
+    // Let messages drain.
+    tokio::time::sleep(Duration::from_millis(200)).await;
+
+    // Evict everything.
+    assert!(tracker.try_cull(50));
+    wait_for_culls(&tracker).await;
+
+    // All 50 unique keys should have been evicted exactly once.
+    let mut deleted = deleter.deleted_keys();
+    deleted.sort_unstable();
+    deleted.dedup();
+    assert_eq!(deleted.len(), 50, "all 50 unique keys should be evicted");
+}

From f1bcf0ee0dbe65b51850180365bdcda8b94b15bc Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Mon, 16 Feb 2026 00:26:23 -0800
Subject: [PATCH 26/33] =?UTF-8?q?test:=20address=20review=20feedback=20?=
 =?UTF-8?q?=E2=80=94=20add=20try=5Fcull=20false=20test,=20increase=20sleep?=
 =?UTF-8?q?=20durations?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/lru_concurrent.rs  | 15 +++++-------
 tests/lru_correctness.rs | 53 +++++++++++++++++++++++++++++++---------
 2 files changed, 48 insertions(+), 20 deletions(-)

diff --git a/tests/lru_concurrent.rs b/tests/lru_concurrent.rs
index 2fea3b8..247a863 100644
--- a/tests/lru_concurrent.rs
+++ b/tests/lru_concurrent.rs
@@ -45,7 +45,7 @@ async fn concurrent_access_notifications() {
     for i in 1u64..=10 {
         tracker.upsert(i, MockCtx { version: i });
     }
-    tokio::time::sleep(Duration::from_millis(50)).await;
+    tokio::time::sleep(Duration::from_millis(100)).await;
 
     // Concurrently access keys 1..=5, making them "recently used."
     let mut set = JoinSet::new();
@@ -58,7 +58,7 @@ async fn concurrent_access_notifications() {
         });
     }
     while set.join_next().await.is_some() {}
-    tokio::time::sleep(Duration::from_millis(50)).await;
+    tokio::time::sleep(Duration::from_millis(100)).await;
 
     // Evict 5 keys — keys 6..=10 should be evicted (they were never accessed).
     assert!(tracker.try_cull(5));
@@ -81,7 +81,7 @@ async fn concurrent_cull_requests() {
     for i in 0u64..20 {
         tracker.upsert(i, MockCtx { version: i });
     }
-    tokio::time::sleep(Duration::from_millis(50)).await;
+    tokio::time::sleep(Duration::from_millis(100)).await;
 
     // Fire 10 concurrent cull requests, each for 2 keys.
     let mut set = JoinSet::new();
@@ -100,13 +100,10 @@ async fn concurrent_cull_requests() {
     // and scheduling, but at least some culls should succeed).
     let deleted = deleter.deleted_keys();
     assert!(
-        !deleted.is_empty(),
-        "at least some evictions should have occurred"
-    );
-    assert!(
-        deleted.len() <= 20,
-        "cannot evict more keys than exist"
+        deleted.len() >= 2,
+        "at least one cull request (2 keys) should have succeeded"
     );
+    assert!(deleted.len() <= 20, "cannot evict more keys than exist");
 }
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
diff --git a/tests/lru_correctness.rs b/tests/lru_correctness.rs
index 1dfc505..38cc02a 100644
--- a/tests/lru_correctness.rs
+++ b/tests/lru_correctness.rs
@@ -16,7 +16,7 @@ async fn evicts_least_recently_inserted() {
     tracker.upsert(3, MockCtx { version: 3 });
 
     // Let the worker process the upsert messages.
-    tokio::time::sleep(std::time::Duration::from_millis(50)).await;
+    tokio::time::sleep(std::time::Duration::from_millis(100)).await;
 
     // Evict 1 key — should be key 1 (least recently inserted).
     assert!(tracker.try_cull(1), "try_cull should succeed");
@@ -35,19 +35,23 @@ async fn access_moves_key_to_back() {
     tracker.upsert(2, MockCtx { version: 2 });
     tracker.upsert(3, MockCtx { version: 3 });
 
-    tokio::time::sleep(std::time::Duration::from_millis(50)).await;
+    tokio::time::sleep(std::time::Duration::from_millis(100)).await;
 
     // Access key 1, moving it to the back of the LRU queue.
     tracker.access(1);
 
-    tokio::time::sleep(std::time::Duration::from_millis(50)).await;
+    tokio::time::sleep(std::time::Duration::from_millis(100)).await;
 
     // Evict 1 key — should now be key 2 (key 1 was refreshed).
     assert!(tracker.try_cull(1));
     wait_for_culls(&tracker).await;
 
     let deleted = deleter.deleted_keys();
-    assert_eq!(deleted, vec![2], "key 2 should be evicted since key 1 was accessed");
+    assert_eq!(
+        deleted,
+        vec![2],
+        "key 2 should be evicted since key 1 was accessed"
+    );
 }
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -56,7 +60,7 @@ async fn try_cull_returns_true_on_success() {
     let tracker = LruEvictionTracker::spawn(deleter, 64);
 
     tracker.upsert(1, MockCtx { version: 1 });
-    tokio::time::sleep(std::time::Duration::from_millis(50)).await;
+    tokio::time::sleep(std::time::Duration::from_millis(100)).await;
 
     let result = tracker.try_cull(1);
     assert!(result, "try_cull should return true when channel has space");
@@ -71,7 +75,7 @@ async fn have_pending_culls_reflects_state() {
     assert!(!tracker.have_pending_culls());
 
     tracker.upsert(1, MockCtx { version: 1 });
-    tokio::time::sleep(std::time::Duration::from_millis(50)).await;
+    tokio::time::sleep(std::time::Duration::from_millis(100)).await;
 
     // Request a cull.
     let _ = tracker.try_cull(1);
@@ -92,7 +96,7 @@ async fn stale_version_ignored() {
     tracker.upsert(1, MockCtx { version: 5 });
     tracker.upsert(1, MockCtx { version: 3 });
 
-    tokio::time::sleep(std::time::Duration::from_millis(50)).await;
+    tokio::time::sleep(std::time::Duration::from_millis(100)).await;
 
     // Evict — the context should carry version 5 (stale version 3 was dropped).
     assert!(tracker.try_cull(1));
@@ -100,7 +104,10 @@ async fn stale_version_ignored() {
 
     let deleted = deleter.deleted.lock().unwrap();
     assert_eq!(deleted.len(), 1);
-    assert_eq!(deleted[0].1.version, 5, "should carry version 5, not stale version 3");
+    assert_eq!(
+        deleted[0].1.version, 5,
+        "should carry version 5, not stale version 3"
+    );
 }
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -111,7 +118,7 @@ async fn evict_more_than_available() {
     tracker.upsert(1, MockCtx { version: 1 });
     tracker.upsert(2, MockCtx { version: 2 });
 
-    tokio::time::sleep(std::time::Duration::from_millis(50)).await;
+    tokio::time::sleep(std::time::Duration::from_millis(100)).await;
 
     // Request eviction of 100 keys but only 2 exist.
     assert!(tracker.try_cull(100));
@@ -130,7 +137,7 @@ async fn multiple_eviction_rounds() {
         tracker.upsert(i, MockCtx { version: i });
     }
 
-    tokio::time::sleep(std::time::Duration::from_millis(50)).await;
+    tokio::time::sleep(std::time::Duration::from_millis(100)).await;
 
     // Round 1: evict 2
     assert!(tracker.try_cull(2));
@@ -141,7 +148,11 @@ async fn multiple_eviction_rounds() {
     wait_for_culls(&tracker).await;
 
     let deleted = deleter.deleted_keys();
-    assert_eq!(deleted.len(), 4, "should have evicted 4 total across 2 rounds");
+    assert_eq!(
+        deleted.len(),
+        4,
+        "should have evicted 4 total across 2 rounds"
+    );
     // First round evicts keys 1, 2; second round evicts keys 3, 4.
     // Deletion tasks within a round may complete in any order, so sort each batch.
     let mut round1: Vec<_> = deleted[..2].to_vec();
@@ -151,3 +162,23 @@ async fn multiple_eviction_rounds() {
     assert_eq!(round1, vec![1, 2]);
     assert_eq!(round2, vec![3, 4]);
 }
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn try_cull_returns_false_when_channel_full() {
+    let deleter = MockDeleter::new();
+    // Channel size 1 — fills immediately.
+    let tracker = LruEvictionTracker::spawn(deleter, 1);
+
+    tracker.upsert(1, MockCtx { version: 1 });
+    tokio::time::sleep(std::time::Duration::from_millis(100)).await;
+
+    // First cull takes the only channel slot.
+    assert!(tracker.try_cull(1), "first try_cull should succeed");
+
+    // Second cull should fail — channel is full.
+    assert!(!tracker.try_cull(1), "second try_cull should fail when channel is full");
+
+    // After the worker drains, pending culls should eventually clear.
+    wait_for_culls(&tracker).await;
+    assert!(!tracker.have_pending_culls());
+}

From 4012b78ae811b23da196d9d0f196be3286defd42 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Mon, 16 Feb 2026 00:28:07 -0800
Subject: [PATCH 27/33] =?UTF-8?q?test:=20address=20review=20feedback=20?=
 =?UTF-8?q?=E2=80=94=20fix=20flaky=20eviction=20test,=20add=20try=5Fcull?=
 =?UTF-8?q?=20false=20test,=20increase=20sleep=20durations?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Fix concurrent_inserts_with_eviction: add sequential warmup so LRU
  worker has eviction candidates before concurrent inserts trigger
  eviction (fixes flakiness where all entries survived)
- Add try_cull_returns_false_when_channel_full test
- Increase sleep durations from 50ms to 100ms in LRU tests
- Tighten concurrent_cull_requests lower bound assertion
---
 tests/fcache_concurrent.rs | 14 ++++++++++++--
 tests/lru_correctness.rs   |  5 ++++-
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/tests/fcache_concurrent.rs b/tests/fcache_concurrent.rs
index 5d704c3..79426f1 100644
--- a/tests/fcache_concurrent.rs
+++ b/tests/fcache_concurrent.rs
@@ -111,12 +111,22 @@ async fn concurrent_reads_during_writes() {
 
 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn concurrent_inserts_with_eviction() {
-    // Small cache: 200 bytes. 50 tasks each insert 20-byte values.
+    // Small cache: 200 bytes. Sequential warmup fills the LRU worker's map,
+    // then concurrent inserts trigger eviction.
     let tmp = tempfile::tempdir().unwrap();
     let cache = Arc::new(FileCache::<u64>::new(tmp.path(), 200).await.unwrap());
 
+    // Warmup: insert 10 entries sequentially so the LRU worker registers them
+    // in its ordered map. This ensures eviction has candidates to evict.
+    for i in 0u64..10 {
+        cache.insert(&i, vec![b'x'; 20]).await.unwrap();
+    }
+    tokio::time::sleep(std::time::Duration::from_millis(100)).await;
+
+    // Concurrent phase: 40 more inserts push the cache well over the 200-byte
+    // limit and trigger eviction against the warmed-up LRU map.
     let mut set = JoinSet::new();
-    for i in 0u64..50 {
+    for i in 10u64..50 {
         let cache = Arc::clone(&cache);
         set.spawn(async move {
             cache.insert(&i, vec![b'x'; 20]).await.unwrap();
diff --git a/tests/lru_correctness.rs b/tests/lru_correctness.rs
index 38cc02a..e9a7bf1 100644
--- a/tests/lru_correctness.rs
+++ b/tests/lru_correctness.rs
@@ -176,7 +176,10 @@ async fn try_cull_returns_false_when_channel_full() {
     assert!(tracker.try_cull(1), "first try_cull should succeed");
 
     // Second cull should fail — channel is full.
-    assert!(!tracker.try_cull(1), "second try_cull should fail when channel is full");
+    assert!(
+        !tracker.try_cull(1),
+        "second try_cull should fail when channel is full"
+    );
 
     // After the worker drains, pending culls should eventually clear.
     wait_for_culls(&tracker).await;

From 23000b63d68ca36095824c8abe33b4b6bc44aff3 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Mon, 16 Feb 2026 00:30:27 -0800
Subject: [PATCH 28/33] more tests

---
 tests/common/mod.rs         |  7 ++++++-
 tests/fcache_correctness.rs | 10 ++++++++--
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/tests/common/mod.rs b/tests/common/mod.rs
index d7d0cea..101f929 100644
--- a/tests/common/mod.rs
+++ b/tests/common/mod.rs
@@ -32,7 +32,12 @@ impl MockDeleter {
 
     /// Returns just the keys that were deleted, in deletion order.
     pub fn deleted_keys(&self) -> Vec<u64> {
-        self.deleted.lock().unwrap().iter().map(|(k, _)| *k).collect()
+        self.deleted
+            .lock()
+            .unwrap()
+            .iter()
+            .map(|(k, _)| *k)
+            .collect()
     }
 }
 
diff --git a/tests/fcache_correctness.rs b/tests/fcache_correctness.rs
index df6e2f3..4731258 100644
--- a/tests/fcache_correctness.rs
+++ b/tests/fcache_correctness.rs
@@ -33,7 +33,10 @@ async fn new_rejects_file_path() {
     assert!(
         matches!(result, Err(InvalidRootPathError::NotADirectory(_))),
         "expected NotADirectory, got {:?}",
-        result.as_ref().map(|_| "Ok(...)").map_err(|e| format!("{e:?}"))
+        result
+            .as_ref()
+            .map(|_| "Ok(...)")
+            .map_err(|e| format!("{e:?}"))
     );
 }
 
@@ -48,7 +51,10 @@ async fn new_rejects_non_empty_unmarked_directory() {
     assert!(
         matches!(result, Err(InvalidRootPathError::RootPathUnsafeCache(_))),
         "expected RootPathUnsafeCache, got {:?}",
-        result.as_ref().map(|_| "Ok(...)").map_err(|e| format!("{e:?}"))
+        result
+            .as_ref()
+            .map(|_| "Ok(...)")
+            .map_err(|e| format!("{e:?}"))
     );
 }
 

From 304389258637e5c329d706017c1297c5c66241b0 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Mon, 16 Feb 2026 00:35:15 -0800
Subject: [PATCH 29/33] fix: add lib/ directory to Docker build context

The Dockerfile only copied src/ but the library crate lives at lib/lib.rs,
causing the Docker build to fail with "couldn't read lib/lib.rs".
---
 tests/docker/Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/docker/Dockerfile b/tests/docker/Dockerfile
index c4f6c61..9821acc 100644
--- a/tests/docker/Dockerfile
+++ b/tests/docker/Dockerfile
@@ -9,6 +9,7 @@ RUN apt-get update && apt-get install -y \
 WORKDIR /src
 COPY Cargo.toml Cargo.lock build.rs ./
 COPY .git/ .git/
+COPY lib/ lib/
 COPY src/ src/
 
 RUN cargo build --release

From 1e0df5d48d1435cc580d209a143543c0f6eb82f9 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Mon, 16 Feb 2026 01:03:53 -0800
Subject: [PATCH 30/33] feat: thread CacheConfig to RepoFs and create FileCache
 per repo

---
 src/daemon.rs           |  2 +-
 src/fs/mescloud/mod.rs  |  9 +++++++--
 src/fs/mescloud/org.rs  | 14 ++++++++++++--
 src/fs/mescloud/repo.rs | 25 ++++++++++++++++++++++++-
 4 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/src/daemon.rs b/src/daemon.rs
index 0e70229..dac2d05 100644
--- a/src/daemon.rs
+++ b/src/daemon.rs
@@ -43,7 +43,7 @@ mod managed_fuse {
                     name: org_name.clone(),
                     api_key: org.api_key.clone(),
                 });
-            let mesa_fs = MesaFS::new(orgs, (config.uid, config.gid));
+            let mesa_fs = MesaFS::new(orgs, (config.uid, config.gid), &config.cache);
             let fuse_adapter = FuserAdapter::new(mesa_fs, handle);
             let mount_opts = [
                 fuser::MountOption::FSName("git-fs".to_owned()),
diff --git a/src/fs/mescloud/mod.rs b/src/fs/mescloud/mod.rs
index a3ce17d..1a3cce8 100644
--- a/src/fs/mescloud/mod.rs
+++ b/src/fs/mescloud/mod.rs
@@ -10,6 +10,7 @@ use secrecy::ExposeSecret as _;
 use tracing::{Instrument as _, instrument, trace, warn};
 use tracing_opentelemetry::OpenTelemetrySpanExt as _;
 
+use crate::app_config::CacheConfig;
 use crate::fs::icache::bridge::HashMapBridge;
 use crate::fs::icache::{AsyncICache, FileTable, IcbResolver};
 use crate::fs::r#trait::{
@@ -154,7 +155,11 @@ impl MesaFS {
 
     /// Create a new `MesaFS` instance.
     #[must_use]
-    pub fn new(orgs: impl Iterator<Item = OrgConfig>, fs_owner: (u32, u32)) -> Self {
+    pub fn new(
+        orgs: impl Iterator<Item = OrgConfig>,
+        fs_owner: (u32, u32),
+        cache: &CacheConfig,
+    ) -> Self {
         let resolver = MesaResolver {
             fs_owner,
             block_size: Self::BLOCK_SIZE,
@@ -174,7 +179,7 @@ impl MesaFS {
                 slots: orgs
                     .map(|org_conf| {
                         let client = build_mesa_client(org_conf.api_key.expose_secret());
-                        let org = OrgFs::new(org_conf.name, client, fs_owner);
+                        let org = OrgFs::new(org_conf.name, client, fs_owner, cache.clone());
                         ChildSlot {
                             inner: org,
                             bridge: HashMapBridge::new(),
diff --git a/src/fs/mescloud/org.rs b/src/fs/mescloud/org.rs
index 968c748..f77aeb4 100644
--- a/src/fs/mescloud/org.rs
+++ b/src/fs/mescloud/org.rs
@@ -17,6 +17,7 @@ use super::composite::{ChildSlot, CompositeFs};
 use super::icache as mescloud_icache;
 use super::icache::MescloudICache;
 use super::repo::RepoFs;
+use crate::app_config::CacheConfig;
 use crate::fs::icache::bridge::HashMapBridge;
 use crate::fs::icache::{AsyncICache, FileTable, IcbResolver};
 use crate::fs::r#trait::{
@@ -94,6 +95,7 @@ pub struct OrgFs {
     composite: CompositeFs<OrgResolver, RepoFs>,
     /// Maps org-level owner-dir inodes to owner name (github only).
     owner_inodes: HashMap<Inode, String>,
+    cache_config: CacheConfig,
 }
 
 impl OrgFs {
@@ -186,7 +188,12 @@ impl OrgFs {
     }
 
     #[must_use]
-    pub fn new(name: String, client: MesaClient, fs_owner: (u32, u32)) -> Self {
+    pub fn new(
+        name: String,
+        client: MesaClient,
+        fs_owner: (u32, u32),
+        cache_config: CacheConfig,
+    ) -> Self {
         let resolver = OrgResolver {
             fs_owner,
             block_size: Self::BLOCK_SIZE,
@@ -203,6 +210,7 @@ impl OrgFs {
                 slots: Vec::new(),
             },
             owner_inodes: HashMap::new(),
+            cache_config,
         }
     }
 
@@ -296,7 +304,9 @@ impl OrgFs {
             repo_name.to_owned(),
             default_branch.to_owned(),
             self.composite.icache.fs_owner(),
-        );
+            self.cache_config.clone(),
+        )
+        .await;
 
         let mut bridge = HashMapBridge::new();
         bridge.insert_inode(ino, RepoFs::ROOT_INO);
diff --git a/src/fs/mescloud/repo.rs b/src/fs/mescloud/repo.rs
index 0d22196..a05c98a 100644
--- a/src/fs/mescloud/repo.rs
+++ b/src/fs/mescloud/repo.rs
@@ -12,6 +12,9 @@ use mesa_dev::low_level::content::{Content, DirEntry as MesaDirEntry};
 use num_traits::cast::ToPrimitive as _;
 use tracing::{Instrument as _, instrument, trace, warn};
 
+use git_fs::cache::fcache::FileCache;
+
+use crate::app_config::CacheConfig;
 use crate::fs::icache::{AsyncICache, FileTable, IcbResolver};
 use crate::fs::r#trait::{
     DirEntry, DirEntryType, FileAttr, FileHandle, FileOpenOptions, FilesystemStats, Fs, Inode,
@@ -183,6 +186,8 @@ pub struct RepoFs {
     file_table: FileTable,
     readdir_buf: Vec<DirEntry>,
     open_files: HashMap<FileHandle, Inode>,
+    #[expect(dead_code, reason = "will be used in RepoFs::read cache integration")]
+    file_cache: Option<FileCache<Inode>>,
 }
 
 impl RepoFs {
@@ -190,12 +195,13 @@ impl RepoFs {
     const BLOCK_SIZE: u32 = 4096;
 
     /// Create a new `RepoFs` for a specific org and repo.
-    pub fn new(
+    pub async fn new(
         client: MesaClient,
         org_name: String,
         repo_name: String,
         ref_: String,
         fs_owner: (u32, u32),
+        cache_config: CacheConfig,
     ) -> Self {
         let resolver = RepoResolver {
             client: client.clone(),
@@ -205,6 +211,22 @@ impl RepoFs {
             fs_owner,
             block_size: Self::BLOCK_SIZE,
         };
+
+        let file_cache = match cache_config.max_size {
+            Some(max_size) if max_size.as_u64() > 0 => {
+                let cache_dir = cache_config.path.join(&org_name).join(&repo_name);
+                let max_bytes = max_size.as_u64().try_into().unwrap_or(usize::MAX);
+                match FileCache::new(&cache_dir, max_bytes).await {
+                    Ok(cache) => Some(cache),
+                    Err(e) => {
+                        warn!(error = ?e, "failed to create file cache, continuing without caching");
+                        None
+                    }
+                }
+            }
+            _ => None,
+        };
+
         Self {
             client,
             org_name,
@@ -214,6 +236,7 @@ impl RepoFs {
             file_table: FileTable::new(),
             readdir_buf: Vec::new(),
             open_files: HashMap::new(),
+            file_cache,
         }
     }
 

From 4b16c597af2c4647da93ae0b320abc292b6e4623 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Mon, 16 Feb 2026 01:09:47 -0800
Subject: [PATCH 31/33] feat: cache decoded file content in RepoFs::read

---
 src/fs/mescloud/repo.rs | 39 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/src/fs/mescloud/repo.rs b/src/fs/mescloud/repo.rs
index a05c98a..2333cac 100644
--- a/src/fs/mescloud/repo.rs
+++ b/src/fs/mescloud/repo.rs
@@ -13,6 +13,7 @@ use num_traits::cast::ToPrimitive as _;
 use tracing::{Instrument as _, instrument, trace, warn};
 
 use git_fs::cache::fcache::FileCache;
+use git_fs::cache::traits::{AsyncReadableCache as _, AsyncWritableCache as _};
 
 use crate::app_config::CacheConfig;
 use crate::fs::icache::{AsyncICache, FileTable, IcbResolver};
@@ -186,7 +187,6 @@ pub struct RepoFs {
     file_table: FileTable,
     readdir_buf: Vec<DirEntry>,
     open_files: HashMap<FileHandle, Inode>,
-    #[expect(dead_code, reason = "will be used in RepoFs::read cache integration")]
     file_cache: Option<FileCache<Inode>>,
 }
 
@@ -219,7 +219,7 @@ impl RepoFs {
                 match FileCache::new(&cache_dir, max_bytes).await {
                     Ok(cache) => Some(cache),
                     Err(e) => {
-                        warn!(error = ?e, "failed to create file cache, continuing without caching");
+                        warn!(error = ?e, org = %org_name, repo = %repo_name, "failed to create file cache, continuing without caching");
                         None
                     }
                 }
@@ -442,9 +442,29 @@ impl Fs for RepoFs {
             "read: inode {ino} has non-file cached attr"
         );
 
+        // Try the file cache first.
+        if let Some(cache) = &self.file_cache
+            && let Some(data) = cache.get(&ino).await
+        {
+            let start = usize::try_from(offset)
+                .unwrap_or(data.len())
+                .min(data.len());
+            let end = start.saturating_add(size as usize).min(data.len());
+            trace!(
+                ino,
+                fh,
+                cached = true,
+                decoded_len = data.len(),
+                start,
+                end,
+                "read content"
+            );
+            return Ok(Bytes::copy_from_slice(&data[start..end]));
+        }
+
+        // Cache miss — fetch from the Mesa API.
         let file_path = self.path_of_inode(ino).await;
 
-        // Non-root inodes must have a resolvable path.
         if ino != Self::ROOT_INO && file_path.is_none() {
             warn!(ino, "read: path_of_inode returned None for non-root inode");
             return Err(ReadError::InodeNotFound);
@@ -474,8 +494,17 @@ impl Fs for RepoFs {
             .unwrap_or(decoded.len())
             .min(decoded.len());
         let end = start.saturating_add(size as usize).min(decoded.len());
-        trace!(ino, fh, path = ?file_path, decoded_len = decoded.len(), start, end, "read content");
-        Ok(Bytes::copy_from_slice(&decoded[start..end]))
+        let result = Bytes::copy_from_slice(&decoded[start..end]);
+        trace!(ino, fh, cached = false, path = ?file_path, decoded_len = decoded.len(), start, end, "read content");
+
+        // Store the decoded content in the cache for future reads.
+        if let Some(cache) = &self.file_cache
+            && let Err(e) = cache.insert(&ino, decoded).await
+        {
+            warn!(error = ?e, ino, "failed to cache file content");
+        }
+
+        Ok(result)
     }
 
     #[instrument(name = "RepoFs::release", skip(self), fields(repo = %self.repo_name))]

From 4ee5a8519a9bbbd1ade370ca416b3a32fc723c86 Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Mon, 16 Feb 2026 19:49:18 -0800
Subject: [PATCH 32/33] minor lint

---
 Cargo.lock              | 188 ++++++----------------------------------
 Cargo.toml              |   2 +-
 src/fs/mescloud/org.rs  |   2 +
 src/fs/mescloud/repo.rs |   3 +-
 4 files changed, 31 insertions(+), 164 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 1002ac9..d4de999 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -283,9 +283,9 @@ dependencies = [
 
 [[package]]
 name = "core-foundation"
-version = "0.9.4"
+version = "0.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f"
+checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6"
 dependencies = [
  "core-foundation-sys",
  "libc",
@@ -525,15 +525,6 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
 
-[[package]]
-name = "encoding_rs"
-version = "0.8.35"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
-dependencies = [
- "cfg-if",
-]
-
 [[package]]
 name = "equivalent"
 version = "1.0.2"
@@ -580,21 +571,6 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
 
-[[package]]
-name = "foreign-types"
-version = "0.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
-dependencies = [
- "foreign-types-shared",
-]
-
-[[package]]
-name = "foreign-types-shared"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
-
 [[package]]
 name = "form_urlencoded"
 version = "1.2.2"
@@ -947,6 +923,7 @@ dependencies = [
  "hyper",
  "hyper-util",
  "rustls",
+ "rustls-native-certs",
  "rustls-pki-types",
  "tokio",
  "tokio-rustls",
@@ -954,22 +931,6 @@ dependencies = [
  "webpki-roots",
 ]
 
-[[package]]
-name = "hyper-tls"
-version = "0.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
-dependencies = [
- "bytes",
- "http-body-util",
- "hyper",
- "hyper-util",
- "native-tls",
- "tokio",
- "tokio-native-tls",
- "tower-service",
-]
-
 [[package]]
 name = "hyper-util"
 version = "0.1.20"
@@ -988,11 +949,9 @@ dependencies = [
  "percent-encoding",
  "pin-project-lite",
  "socket2",
- "system-configuration",
  "tokio",
  "tower-service",
  "tracing",
- "windows-registry",
 ]
 
 [[package]]
@@ -1328,9 +1287,9 @@ checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
 
 [[package]]
 name = "mesa-dev"
-version = "1.11.0"
+version = "1.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0685415ca22ab4f72a6a3f0f720fcca0c69d295d2a95a40cd3ce92555103d3a1"
+checksum = "92979030f29144d605a5f077fe21502b5397b30af92da92d2ca3d11051c8ff6c"
 dependencies = [
  "async-stream",
  "futures-core",
@@ -1345,9 +1304,9 @@ dependencies = [
 
 [[package]]
 name = "mesa_dev_oapi"
-version = "1.8.0"
+version = "1.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "266585ec1d9950343ba2a307909003c05342149c4bd0cf5b224591b2621ccfd4"
+checksum = "4456b424726e1dbf9e3ac4763438eda69939458a6cd50ca06dd65d7333b75ff1"
 dependencies = [
  "reqwest",
  "reqwest-middleware",
@@ -1387,23 +1346,6 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
-[[package]]
-name = "native-tls"
-version = "0.2.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "87de3442987e9dbec73158d5c715e7ad9072fda936bb03d19d7fa10e00520f0e"
-dependencies = [
- "libc",
- "log",
- "openssl",
- "openssl-probe",
- "openssl-sys",
- "schannel",
- "security-framework",
- "security-framework-sys",
- "tempfile",
-]
-
 [[package]]
 name = "nix"
 version = "0.29.0"
@@ -1467,49 +1409,11 @@ version = "1.70.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
 
-[[package]]
-name = "openssl"
-version = "0.10.75"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328"
-dependencies = [
- "bitflags",
- "cfg-if",
- "foreign-types",
- "libc",
- "once_cell",
- "openssl-macros",
- "openssl-sys",
-]
-
-[[package]]
-name = "openssl-macros"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
 [[package]]
 name = "openssl-probe"
-version = "0.1.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
-
-[[package]]
-name = "openssl-sys"
-version = "0.9.111"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321"
-dependencies = [
- "cc",
- "libc",
- "pkg-config",
- "vcpkg",
-]
+checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe"
 
 [[package]]
 name = "opentelemetry"
@@ -1928,7 +1832,6 @@ checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147"
 dependencies = [
  "base64",
  "bytes",
- "encoding_rs",
  "futures-channel",
  "futures-core",
  "futures-util",
@@ -1938,24 +1841,21 @@ dependencies = [
  "http-body-util",
  "hyper",
  "hyper-rustls",
- "hyper-tls",
  "hyper-util",
  "js-sys",
  "log",
- "mime",
  "mime_guess",
- "native-tls",
  "percent-encoding",
  "pin-project-lite",
  "quinn",
  "rustls",
+ "rustls-native-certs",
  "rustls-pki-types",
  "serde",
  "serde_json",
  "serde_urlencoded",
  "sync_wrapper",
  "tokio",
- "tokio-native-tls",
  "tokio-rustls",
  "tower",
  "tower-http",
@@ -2038,6 +1938,18 @@ dependencies = [
  "zeroize",
 ]
 
+[[package]]
+name = "rustls-native-certs"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63"
+dependencies = [
+ "openssl-probe",
+ "rustls-pki-types",
+ "schannel",
+ "security-framework",
+]
+
 [[package]]
 name = "rustls-pki-types"
 version = "1.14.0"
@@ -2144,9 +2056,9 @@ dependencies = [
 
 [[package]]
 name = "security-framework"
-version = "2.11.1"
+version = "3.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
+checksum = "d17b898a6d6948c3a8ee4372c17cb384f90d2e6e912ef00895b14fd7ab54ec38"
 dependencies = [
  "bitflags",
  "core-foundation",
@@ -2157,9 +2069,9 @@ dependencies = [
 
 [[package]]
 name = "security-framework-sys"
-version = "2.15.0"
+version = "2.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0"
+checksum = "321c8673b092a9a42605034a9879d73cb79101ed5fd117bc9a597b89b4e9e61a"
 dependencies = [
  "core-foundation-sys",
  "libc",
@@ -2444,27 +2356,6 @@ dependencies = [
  "syn",
 ]
 
-[[package]]
-name = "system-configuration"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b"
-dependencies = [
- "bitflags",
- "core-foundation",
- "system-configuration-sys",
-]
-
-[[package]]
-name = "system-configuration-sys"
-version = "0.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4"
-dependencies = [
- "core-foundation-sys",
- "libc",
-]
-
 [[package]]
 name = "tempfile"
 version = "3.25.0"
@@ -2613,16 +2504,6 @@ dependencies = [
  "syn",
 ]
 
-[[package]]
-name = "tokio-native-tls"
-version = "0.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
-dependencies = [
- "native-tls",
- "tokio",
-]
-
 [[package]]
 name = "tokio-rustls"
 version = "0.26.4"
@@ -2937,12 +2818,6 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
 
-[[package]]
-name = "vcpkg"
-version = "0.2.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
-
 [[package]]
 name = "vergen"
 version = "9.1.0"
@@ -3230,17 +3105,6 @@ version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
 
-[[package]]
-name = "windows-registry"
-version = "0.6.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720"
-dependencies = [
- "windows-link",
- "windows-result",
- "windows-strings",
-]
-
 [[package]]
 name = "windows-result"
 version = "0.4.1"
diff --git a/Cargo.toml b/Cargo.toml
index 9cd0c59..12daa76 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,7 +15,7 @@ clap = { version = "4.5.54", features = ["derive", "env"] }
 tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
 fuser = { version = "0.16.0", features = ["libfuse"] }
 libc = "0.2"
-mesa-dev = "1.11.0"
+mesa-dev = "1.15.0"
 num-traits = "0.2"
 http = "1"
 reqwest = { version = "0.12", default-features = false }
diff --git a/src/fs/mescloud/org.rs b/src/fs/mescloud/org.rs
index f77aeb4..1f3b8b5 100644
--- a/src/fs/mescloud/org.rs
+++ b/src/fs/mescloud/org.rs
@@ -304,6 +304,8 @@ impl OrgFs {
             repo_name.to_owned(),
             default_branch.to_owned(),
             self.composite.icache.fs_owner(),
+            // TODO(markovejnovic): Unnecessary clone. Refactoring for clearer ownership semantics
+            //                      would be ideal.
             self.cache_config.clone(),
         )
         .await;
diff --git a/src/fs/mescloud/repo.rs b/src/fs/mescloud/repo.rs
index 2333cac..11b334a 100644
--- a/src/fs/mescloud/repo.rs
+++ b/src/fs/mescloud/repo.rs
@@ -219,7 +219,8 @@ impl RepoFs {
                 match FileCache::new(&cache_dir, max_bytes).await {
                     Ok(cache) => Some(cache),
                     Err(e) => {
-                        warn!(error = ?e, org = %org_name, repo = %repo_name, "failed to create file cache, continuing without caching");
+                        warn!(error = ?e, org = %org_name, repo = %repo_name,
+                            "failed to create file cache, continuing without caching",);
                         None
                     }
                 }

From 07ac9ad8a9c8af16c3e2667fc5a50b51d556bacb Mon Sep 17 00:00:00 2001
From: Marko Vejnovic <marko@mesa.dev>
Date: Mon, 16 Feb 2026 20:10:01 -0800
Subject: [PATCH 33/33] flush file

---
 lib/cache/fcache.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/cache/fcache.rs b/lib/cache/fcache.rs
index 8d69f1a..2e445b7 100644
--- a/lib/cache/fcache.rs
+++ b/lib/cache/fcache.rs
@@ -405,6 +405,7 @@ impl<K: Eq + Hash + Copy + Send + Sync + 'static + Debug>
         };
         let mut new_file = self.create_file(&path).await?;
         new_file.write_all(&value).await?;
+        new_file.flush().await?;
 
         // Use entry_async to lock the bucket, then allocate version under the lock.
         // This ensures version monotonicity per key matches the actual mutation order.