-
Notifications
You must be signed in to change notification settings - Fork 0
Prefetching (non-slop) #40
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,203 @@ | ||
| //! Async INode Table which supports concurrent access and modification. | ||
| use std::{ffi::OsStr, sync::Arc}; | ||
|
|
||
| use crate::fs::{FileHandle, INode, INodeType, InodeAddr, dcache::DCache}; | ||
|
|
||
| use scc; | ||
| use tokio::sync::{Notify, OnceCell, watch}; | ||
|
|
||
| /// Represents an INode that has been loaded by the kernel. | ||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] | ||
| pub struct LoadedAddr(pub InodeAddr); | ||
|
|
||
| /// Represents an open file handle that has been loaded by the kernel. | ||
| pub struct OpenFile(pub FileHandle); | ||
|
|
||
| /// A trait representing a data provider for the `AsyncFs`. This trait abstracts over the | ||
| /// underlying filesystem or storage mechanism that provides inode data to the `AsyncFs` on cache | ||
| /// misses. | ||
| pub trait FsDataProvider: Clone + Send + Sync + 'static { | ||
| type LookupFuture: Future<Output = Result<INode, std::io::Error>>; | ||
|
|
||
| async fn lookup(&self, parent: INode, name: &OsStr) -> Result<INode, std::io::Error>; | ||
| } | ||
|
|
||
| /// An asynchronous implementation of a table mapping `InodeAddr` to `InodeData`. | ||
| pub struct AsyncFs<DP: FsDataProvider> { | ||
| /// The underlying concurrent hash map storing the inode data. | ||
| inode_table: scc::HashMap<InodeAddr, INode>, | ||
|
|
||
| /// Cache for directory entries, mapping a parent `InodeAddr` | ||
| directory_cache: DCache, | ||
|
|
||
| lookups_in_flight: scc::HashMap<InodeAddr, Notify>, | ||
| inodes_in_flight: scc::HashMap<InodeAddr, Notify>, | ||
|
|
||
| /// The data provider used to fetch inode data on cache misses. | ||
| data_provider: DP, | ||
| } | ||
|
|
||
| impl<DP: FsDataProvider> AsyncFs<DP> { | ||
| /// Get the total number of inodes currently stored in the table. | ||
| pub fn inode_count(&self) -> usize { | ||
| self.inode_table.len() | ||
| } | ||
|
|
||
| /// Asynchronously look up an `Inode` by its `InodeAddr`. | ||
| /// | ||
| /// Args: | ||
| /// | ||
| /// - `parent`: The `LoadedAddr` of the parent directory containing the inode to look up. | ||
| /// - `name`: The name of the inode to look up within the parent directory. | ||
| pub async fn lookup(&self, parent: LoadedAddr, name: &OsStr) -> Result<INode, std::io::Error> { | ||
| if cfg!(debug_assertions) { | ||
| let parent_ino = self.loaded_inode(parent).await?; | ||
|
|
||
| debug_assert!( | ||
| matches!(parent_ino.itype, INodeType::Directory), | ||
| "parent inode should be a directory" | ||
| ); | ||
| } | ||
|
|
||
| if let Some(dentry) = self.directory_cache.lookup(parent, name) { | ||
| // The directory cache has an entry for this name. Let's load it from the main table | ||
| // and return it. If the entry is in the dcache, then that necessarily means the inode | ||
| // is loaded in the main table. | ||
| return self.loaded_inode(dentry.ino).await; | ||
| } | ||
|
|
||
| // The directory entry does NOT exist in the cache, so we need to perform a lookup. | ||
| let parent_ino = self.loaded_inode(parent).await?; | ||
| self.data_provider.lookup(parent_ino, name).await | ||
| } | ||
|
|
||
| /// Asynchronously get the attributes of an inode by its `InodeAddr`. | ||
| /// | ||
| /// Args: | ||
| /// - `ino`: The `LoadedAddr` of the inode whose attributes to retrieve. | ||
| /// - `fh`: An optional `FileHandle` that may be used to optimize attribute retrieval for open | ||
| /// files. | ||
| pub async fn getattr( | ||
| &mut self, | ||
| ino: LoadedAddr, | ||
| fh: Option<OpenFile>, | ||
| ) -> Result<(), std::io::Error> { | ||
| unimplemented!(); | ||
| } | ||
|
|
||
| /// Attempt to load an `INode` from the table. | ||
| /// | ||
| /// # Notes | ||
| /// | ||
| /// - This expects the inode to be loaded and available in the table. | ||
| /// - Will handle inodes that are currently in-flight by awaiting their completion, but this is | ||
| /// not expected and will fail in debug mode. | ||
| async fn loaded_inode(&self, addr: LoadedAddr) -> Result<INode, std::io::Error> { | ||
| debug_assert!( | ||
| self.inode_table.contains_sync(&addr.0) || self.inodes_in_flight.contains_sync(&addr.0) | ||
| ); | ||
|
|
||
| match self.inode_table.get_async(&addr.0).await { | ||
| Some(v) => Ok(*v), | ||
| None => { | ||
| // We didn't find the inode in the main table, so it may be in-flight. | ||
| match self.inodes_in_flight.get_async(&addr.0).await { | ||
| Some(notifier) => { | ||
| // Let's await the notifier for this inode to be loaded. Once it's | ||
| // notified, it's safe for us to re-read the main table and expect the | ||
| // inode to be present. | ||
| notifier.notified().await; | ||
|
|
||
| match self.inode_table.get_async(&addr.0).await { | ||
| Some(v) => Ok(*v), | ||
| None => { | ||
| debug_assert!( | ||
| false, | ||
| "inode should have been loaded after notification: {:?}", | ||
| addr.0 | ||
| ); | ||
|
|
||
| Err(std::io::Error::new( | ||
| std::io::ErrorKind::NotFound, | ||
| format!("inode not found after notification: {:?}", addr.0), | ||
| )) | ||
| } | ||
| } | ||
| } | ||
| None => { | ||
| // The inode was never requested for loading, which means we have a | ||
| // programming bug. | ||
| debug_assert!( | ||
| false, | ||
| "inode not found in main table or in-flight map: {:?}", | ||
| addr.0 | ||
| ); | ||
|
|
||
| Err(std::io::Error::new( | ||
| std::io::ErrorKind::NotFound, | ||
| format!( | ||
| "inode not found in main table or in-flight map: {:?}", | ||
| addr.0 | ||
| ), | ||
| )) | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| async fn inode(&self, addr: LoadedAddr) -> Result<INode, std::io::Error> { | ||
| debug_assert!( | ||
| self.inode_table.contains_sync(&addr.0) || self.inodes_in_flight.contains_sync(&addr.0) | ||
| ); | ||
|
|
||
| match self.inode_table.get_async(&addr.0).await { | ||
| Some(v) => Ok(*v), | ||
| None => { | ||
| // We didn't find the inode in the main table, so it may be in-flight. | ||
| match self.inodes_in_flight.get_async(&addr.0).await { | ||
| Some(notifier) => { | ||
| // Let's await the notifier for this inode to be loaded. Once it's | ||
| // notified, it's safe for us to re-read the main table and expect the | ||
| // inode to be present. | ||
| notifier.notified().await; | ||
|
|
||
| match self.inode_table.get_async(&addr.0).await { | ||
| Some(v) => Ok(*v), | ||
| None => { | ||
| debug_assert!( | ||
| false, | ||
| "inode should have been loaded after notification: {:?}", | ||
| addr.0 | ||
| ); | ||
|
|
||
| Err(std::io::Error::new( | ||
| std::io::ErrorKind::NotFound, | ||
| format!("inode not found after notification: {:?}", addr.0), | ||
| )) | ||
| } | ||
| } | ||
| } | ||
| None => { | ||
| // The inode was never requested for loading, which means we have a | ||
| // programming bug. | ||
| debug_assert!( | ||
| false, | ||
| "inode not found in main table or in-flight map: {:?}", | ||
| addr.0 | ||
| ); | ||
|
|
||
| Err(std::io::Error::new( | ||
| std::io::ErrorKind::NotFound, | ||
| format!( | ||
| "inode not found in main table or in-flight map: {:?}", | ||
| addr.0 | ||
| ), | ||
| )) | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,63 @@ | ||
| use std::ffi::{OsStr, OsString}; | ||
| use std::ops::Bound; | ||
|
|
||
| use scc::Guard; | ||
|
|
||
| use crate::fs::async_fs::LoadedAddr; | ||
|
|
||
| #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] | ||
| pub struct DKey { | ||
| pub parent_ino: LoadedAddr, | ||
| pub name: OsString, | ||
| } | ||
|
|
||
| #[derive(Debug, Clone, PartialEq, Eq)] | ||
| pub struct DValue { | ||
| pub ino: LoadedAddr, | ||
| pub is_dir: bool, | ||
| } | ||
|
|
||
| #[derive(Default)] | ||
| pub struct DCache { | ||
| cache: scc::TreeIndex<DKey, DValue>, | ||
| } | ||
|
|
||
| impl DCache { | ||
| pub fn new() -> Self { | ||
| Self::default() | ||
| } | ||
|
|
||
| pub fn lookup(&self, parent_ino: LoadedAddr, name: &OsStr) -> Option<DValue> { | ||
| let key = DKey { | ||
| parent_ino, | ||
| name: name.to_os_string(), | ||
| }; | ||
| self.cache.peek_with(&key, |_, v| v.clone()) | ||
| } | ||
|
|
||
| pub fn insert(&self, parent_ino: LoadedAddr, name: OsString, ino: LoadedAddr, is_dir: bool) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Prompt for Agent |
||
| // Overwrites are expected when refreshing cached entries. | ||
| let _ = self | ||
| .cache | ||
| .insert_sync(DKey { parent_ino, name }, DValue { ino, is_dir }); | ||
| } | ||
|
|
||
| pub fn readdir(&self, parent_ino: LoadedAddr) -> Vec<(OsString, DValue)> { | ||
| let lower = Bound::Included(DKey { | ||
| parent_ino, | ||
| name: OsString::new(), | ||
| }); | ||
| let upper = match parent_ino.0.checked_add(1) { | ||
| Some(next) => Bound::Excluded(DKey { | ||
| parent_ino: LoadedAddr(next), | ||
| name: OsString::new(), | ||
| }), | ||
| None => Bound::Unbounded, | ||
| }; | ||
| let guard = Guard::new(); | ||
| self.cache | ||
| .range((lower, upper), &guard) | ||
| .map(|(key, value)| (key.name.clone(), value.clone())) | ||
| .collect() | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,113 @@ | ||
| //! Useful filesystem generalizations. | ||
| pub mod async_fs; | ||
| pub mod dcache; | ||
|
|
||
| use std::time::SystemTime; | ||
|
|
||
| use bitflags::bitflags; | ||
|
|
||
| /// Type representing an inode identifier. | ||
| pub type InodeAddr = u64; | ||
|
|
||
| /// Type representing a file handle. | ||
| pub type FileHandle = u64; | ||
|
|
||
| bitflags! { | ||
| /// Permission bits for an inode, similar to Unix file permissions. | ||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] | ||
| pub struct InodePerms: u16 { | ||
| // Other | ||
| const OTHER_EXECUTE = 1 << 0; | ||
| const OTHER_WRITE = 1 << 1; | ||
| const OTHER_READ = 1 << 2; | ||
|
|
||
| // Group | ||
| const GROUP_EXECUTE = 1 << 3; | ||
| const GROUP_WRITE = 1 << 4; | ||
| const GROUP_READ = 1 << 5; | ||
|
|
||
| // Owner | ||
| const OWNER_EXECUTE = 1 << 6; | ||
| const OWNER_WRITE = 1 << 7; | ||
| const OWNER_READ = 1 << 8; | ||
|
|
||
| // Special bits | ||
| const STICKY = 1 << 9; | ||
| const SETGID = 1 << 10; | ||
| const SETUID = 1 << 11; | ||
|
|
||
| const OTHER_RWX = Self::OTHER_READ.bits() | ||
| | Self::OTHER_WRITE.bits() | ||
| | Self::OTHER_EXECUTE.bits(); | ||
| const GROUP_RWX = Self::GROUP_READ.bits() | ||
| | Self::GROUP_WRITE.bits() | ||
| | Self::GROUP_EXECUTE.bits(); | ||
| const OWNER_RWX = Self::OWNER_READ.bits() | ||
| | Self::OWNER_WRITE.bits() | ||
| | Self::OWNER_EXECUTE.bits(); | ||
| } | ||
| } | ||
|
|
||
| bitflags! { | ||
| /// Flags for opening a file, similar to Unix open(2) flags. | ||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] | ||
| pub struct OpenFlags: i32 { | ||
| // Access modes (mutually exclusive) | ||
| const RDONLY = libc::O_RDONLY; | ||
| const WRONLY = libc::O_WRONLY; | ||
| const RDWR = libc::O_RDWR; | ||
|
|
||
| // Creation/status flags | ||
| const APPEND = libc::O_APPEND; | ||
| const TRUNC = libc::O_TRUNC; | ||
| const CREAT = libc::O_CREAT; | ||
| const EXCL = libc::O_EXCL; | ||
|
|
||
| // Behavior flags | ||
| const NONBLOCK = libc::O_NONBLOCK; | ||
| const SYNC = libc::O_SYNC; | ||
| const DSYNC = libc::O_DSYNC; | ||
| const NOFOLLOW = libc::O_NOFOLLOW; | ||
| const CLOEXEC = libc::O_CLOEXEC; | ||
| const DIRECTORY = libc::O_DIRECTORY; | ||
|
|
||
| #[cfg(target_os = "linux")] | ||
| const NOATIME = libc::O_NOATIME; | ||
| } | ||
| } | ||
|
|
||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] | ||
| pub enum INodeType { | ||
| File, | ||
| Directory, | ||
| } | ||
|
|
||
| /// Representation of an inode. | ||
| #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] | ||
| pub struct INode { | ||
| /// The address of this inode, which serves as its unique identifier. | ||
| pub addr: InodeAddr, | ||
| /// The permissions associated with this inode, represented as a bitfield. | ||
| pub permissions: InodePerms, | ||
| /// The user ID of the owner of this inode. | ||
| pub uid: u32, | ||
| /// The group ID of the owner of this inode. | ||
| pub gid: u32, | ||
| /// The time this inode was created at. | ||
| pub create_time: SystemTime, | ||
| /// The time this inode was last modified at. | ||
| pub last_modified_at: SystemTime, | ||
| /// The parent inode address, if any. This is `None` for the root inode. | ||
| pub parent: Option<InodeAddr>, | ||
| /// The size of the file represented by this inode, in bytes. | ||
| pub size: usize, | ||
| /// Additional information about the type of this inode (e.g., file vs directory). | ||
| pub itype: INodeType, | ||
| } | ||
|
|
||
| impl INode { | ||
| /// Check if this inode is the root inode (i.e., has no parent). | ||
| pub fn is_root(&self) -> bool { | ||
| self.parent.is_none() | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,4 +2,6 @@ | |
|
|
||
| /// Caching primitives for git-fs. | ||
| pub mod cache; | ||
| pub mod fs; | ||
| pub mod io; | ||
| pub mod tokex; | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FsDataProvideruses theFuturetrait in its associated type, but this module never bringsstd::future::Futureinto scope. As written the file doesn’t compile (cannot find type �Future� in this scope). Please importstd::future::Futurebefore defining the trait.Prompt for Agent