move git store to its own file

This commit is contained in:
Brian Picciano 2023-05-11 11:54:25 +02:00
parent a0bdf610d9
commit 796b90e91d
2 changed files with 383 additions and 391 deletions

View File

@ -1,6 +1,8 @@
use crate::origin;
use std::error::Error;
pub mod git;
#[derive(Clone, Copy)]
pub struct Limits {
// TODO storage limits
@ -60,394 +62,3 @@ pub trait Store {
fn get(&self, descr: origin::Descr) -> Result<Self::Origin<'_>, GetError>;
fn all_descrs(&self) -> AllDescrsResult<Self::AllDescrsIter<'_>>;
}
pub mod git {
use crate::origin;
use crate::origin::store;
use std::error::Error;
use std::path::{Path, PathBuf};
use std::{collections, fs, io, sync};
pub struct Origin {
descr: origin::Descr,
repo: gix::ThreadSafeRepository,
tree_object_id: gix::ObjectId,
}
impl origin::Origin for sync::Arc<Origin> {
fn descr(&self) -> &origin::Descr {
&self.descr
}
fn read_file_into(
&self,
path: &str,
into: &mut dyn std::io::Write,
) -> Result<(), origin::ReadFileIntoError> {
let mut clean_path = Path::new(path);
clean_path = clean_path.strip_prefix("/").unwrap_or(clean_path);
let repo = self.repo.to_thread_local();
let file_object = repo
.find_object(self.tree_object_id)
.map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))?
.peel_to_tree()
.map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))?
.lookup_entry_by_path(clean_path)
.map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))?
.ok_or(origin::ReadFileIntoError::FileNotFound)?
.object()
.map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))?;
into.write_all(file_object.data.as_ref())
.map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))?;
Ok(())
}
}
#[derive(thiserror::Error, Debug)]
enum GetOriginError {
#[error("invalid branch name")]
InvalidBranchName,
#[error(transparent)]
Unexpected(Box<dyn Error>),
}
/// git::Store implements the Store trait for any Descr::Git based Origins. If any non-git
/// Descrs are used then this implementation will panic.
pub struct Store {
dir_path: PathBuf,
// to prevent against syncing the same origin more than once at a time, but still allowing
// more than one origin to be syncing at a time
sync_guard: sync::Mutex<collections::HashMap<origin::Descr, ()>>,
origins: sync::RwLock<collections::HashMap<origin::Descr, sync::Arc<Origin>>>,
}
impl Store {
pub fn new(dir_path: PathBuf) -> io::Result<Store> {
fs::create_dir_all(&dir_path)?;
Ok(Store {
dir_path,
sync_guard: sync::Mutex::new(collections::HashMap::new()),
origins: sync::RwLock::new(collections::HashMap::new()),
})
}
fn repo_path(&self, descr: &origin::Descr) -> PathBuf {
self.dir_path.join(descr.id())
}
fn descr_file_path(&self, descr_id: &str) -> PathBuf {
self.dir_path.join(descr_id).join("descr.json")
}
fn branch_ref(&self, branch_name: &str) -> String {
format!("origin/{branch_name}")
}
fn get_origin(
&self,
repo: gix::Repository,
descr: origin::Descr,
) -> Result<sync::Arc<Origin>, GetOriginError> {
let origin::Descr::Git {
ref branch_name, ..
} = descr;
let commit_object_id = repo
.try_find_reference(&self.branch_ref(branch_name))
.map_err(|e| GetOriginError::Unexpected(Box::from(e)))?
.ok_or(GetOriginError::InvalidBranchName)?
.peel_to_id_in_place()
.map_err(|e| GetOriginError::Unexpected(Box::from(e)))?
.detach();
let tree_object_id = repo
.find_object(commit_object_id)
.map_err(|e| GetOriginError::Unexpected(Box::from(e)))?
.try_to_commit_ref()
.map_err(|e| GetOriginError::Unexpected(Box::from(e)))?
.tree();
return Ok(sync::Arc::from(Origin {
descr,
repo: repo.into(),
tree_object_id,
}));
}
fn sync_inner(
&self,
descr: &origin::Descr,
_limits: store::Limits,
) -> Result<gix::Repository, store::SyncError> {
use gix::clone::Error as gixCloneErr;
use gix::progress::Discard;
let should_interrupt = &core::sync::atomic::AtomicBool::new(false);
let repo_path = &self.repo_path(&descr);
// if the path doesn't exist then use the gix clone feature to clone it into the
// directory.
if fs::read_dir(repo_path).is_err() {
fs::create_dir_all(repo_path)
.map_err(|e| store::SyncError::Unexpected(Box::from(e)))?;
let origin::Descr::Git {
ref url,
ref branch_name,
} = descr;
let (repo, _) = gix::prepare_clone_bare(url.clone(), repo_path)
.map_err(|e| match e {
gixCloneErr::Init(gix::init::Error::InvalidBranchName { .. }) => {
store::SyncError::InvalidBranchName
}
gixCloneErr::UrlParse(_) | gixCloneErr::CanonicalizeUrl { .. } => {
store::SyncError::InvalidURL
}
_ => store::SyncError::Unexpected(Box::from(e)),
})?
.fetch_only(Discard, should_interrupt)
.map_err(|_| store::SyncError::InvalidURL)?;
// Check to make sure the branch name exists
// TODO if this fails we should delete repo_path
repo.try_find_reference(&self.branch_ref(branch_name))
.map_err(|e| store::SyncError::Unexpected(Box::from(e)))?
.ok_or(store::SyncError::InvalidBranchName)?;
// Add the descr to the repo directory, so we can know the actual descr later
// TODO if this fails we should delete repo_path
let descr_file = fs::File::create(self.descr_file_path(descr.id().as_ref()))
.map_err(|e| store::SyncError::Unexpected(Box::from(e)))?;
serde_json::to_writer(descr_file, &descr)
.map_err(|e| store::SyncError::Unexpected(Box::from(e)))?;
return Ok(repo);
}
let direction = gix::remote::Direction::Fetch;
let repo =
gix::open(repo_path).map_err(|e| store::SyncError::Unexpected(Box::from(e)))?;
let remote = repo
.find_default_remote(direction)
.ok_or_else(|| store::SyncError::Unexpected(Box::from("no default configured")))?
.map_err(|e| store::SyncError::Unexpected(Box::from(e)))?;
remote
.connect(direction)
.map_err(|e| store::SyncError::Unexpected(Box::from(e)))?
.prepare_fetch(Discard, Default::default())
.map_err(|e| store::SyncError::Unexpected(Box::from(e)))?
.receive(Discard, should_interrupt)
.map_err(|e| store::SyncError::Unexpected(Box::from(e)))?;
Ok(repo)
}
}
impl super::Store for Store {
type Origin<'store> = sync::Arc<Origin>
where Self: 'store;
type AllDescrsIter<'store> = Box<dyn Iterator<Item = store::AllDescrsResult<origin::Descr>> + 'store>
where Self: 'store;
fn sync(
&self,
descr: origin::Descr,
limits: store::Limits,
) -> Result<(), store::SyncError> {
// attempt to lock this descr for syncing, doing so within a new scope so the mutex
// isn't actually being held for the whole method duration.
let is_already_syncing = {
self.sync_guard
.lock()
.unwrap()
.insert(descr.clone(), ())
.is_some()
};
if is_already_syncing {
return Err(store::SyncError::AlreadyInProgress);
}
let res = self.sync_inner(&descr, limits);
self.sync_guard.lock().unwrap().remove(&descr);
let repo = match res {
Ok(repo) => repo,
Err(e) => return Err(e),
};
// repo is synced at this point (though the sync lock is still held), just gotta create
// the origin and store it.
//
// TODO this is a bit of a memory leak, but by the time we get
// to that point this should all be backed by something which isn't local storage
// anyway.
// calling this while the sync lock is held isn't ideal, but it's convenient and
// shouldn't be too terrible generally
let origin = self.get_origin(repo, descr.clone()).map_err(|e| match e {
GetOriginError::InvalidBranchName => store::SyncError::InvalidBranchName,
GetOriginError::Unexpected(e) => store::SyncError::Unexpected(e),
})?;
let mut origins = self.origins.write().unwrap();
(*origins).insert(descr, origin);
Ok(())
}
fn get(&self, descr: origin::Descr) -> Result<Self::Origin<'_>, store::GetError> {
{
let origins = self.origins.read().unwrap();
if let Some(origin) = origins.get(&descr) {
return Ok(origin.clone());
}
}
let repo_path = self.repo_path(&descr);
fs::read_dir(&repo_path).map_err(|e| match e.kind() {
io::ErrorKind::NotFound => store::GetError::NotFound,
_ => store::GetError::Unexpected(Box::from(e)),
})?;
let repo =
gix::open(&repo_path).map_err(|e| store::GetError::Unexpected(Box::from(e)))?;
let origin = self.get_origin(repo, descr.clone()).map_err(|e| match e {
// it's not expected that the branch name is invalid at this point, it must have
// existed for sync to have been successful.
GetOriginError::InvalidBranchName => store::GetError::Unexpected(Box::from(e)),
GetOriginError::Unexpected(e) => store::GetError::Unexpected(e),
})?;
let mut origins = self.origins.write().unwrap();
(*origins).insert(descr, origin.clone());
Ok(origin)
}
fn all_descrs(&self) -> store::AllDescrsResult<Self::AllDescrsIter<'_>> {
Ok(Box::from(
fs::read_dir(&self.dir_path)
.map_err(|e| store::AllDescrsError::Unexpected(Box::from(e)))?
.map(
|dir_entry_res: io::Result<fs::DirEntry>| -> store::AllDescrsResult<origin::Descr> {
let descr_id: String = dir_entry_res
.map_err(|e| store::AllDescrsError::Unexpected(Box::from(e)))?
.file_name()
.to_str()
.ok_or_else(|| {
store::AllDescrsError::Unexpected(Box::from(
"couldn't convert os string to &str",
))
})?
.into();
let descr_file_path = self.descr_file_path(descr_id.as_ref());
// TODO it's possible that opening the file will fail if syncing is
// still ongoing, as writing the descr file is the last step after
// initial sync has succeeded.
let descr_file = fs::File::open(descr_file_path)
.map_err(|e| store::AllDescrsError::Unexpected(Box::from(e)))?;
let descr = serde_json::from_reader(descr_file)
.map_err(|e| store::AllDescrsError::Unexpected(Box::from(e)))?;
Ok(descr)
},
),
))
}
}
#[cfg(test)]
mod tests {
use crate::origin::store;
use crate::origin::store::Store;
use crate::origin::{self, Origin};
use std::sync;
use tempdir::TempDir;
#[test]
fn basic() {
let tmp_dir = TempDir::new("origin_store_git").unwrap();
let curr_dir = format!("file://{}", std::env::current_dir().unwrap().display());
let descr = origin::Descr::Git {
url: curr_dir.clone(),
branch_name: String::from("master"),
};
let other_descr = origin::Descr::Git {
url: curr_dir.clone(),
branch_name: String::from("some_other_branch"),
};
let limits = store::Limits {};
let store = super::Store::new(tmp_dir.path().to_path_buf()).expect("store created");
store
.sync(descr.clone(), limits)
.expect("sync should succeed");
store
.sync(descr.clone(), limits)
.expect("second sync should succeed");
assert!(matches!(
store.get(other_descr),
Err::<sync::Arc<super::Origin>, store::GetError>(store::GetError::NotFound),
));
let origin = store.get(descr.clone()).expect("origin retrieved");
assert_eq!(&descr, origin.descr());
let assert_write = |path: &str| {
let mut into: Vec<u8> = vec![];
origin
.read_file_into(path, &mut into)
.expect("write should succeed");
assert!(into.len() > 0);
};
assert_write("src/lib.rs");
assert_write("/src/lib.rs");
// File doesn't exist
let mut into: Vec<u8> = vec![];
assert!(matches!(
origin.read_file_into("DNE", &mut into),
Err::<(), origin::ReadFileIntoError>(origin::ReadFileIntoError::FileNotFound),
));
assert_eq!(into.len(), 0);
let descrs = store
.all_descrs()
.expect("all_descrs callsed")
.collect::<Vec<Result<origin::Descr, store::AllDescrsError>>>();
assert_eq!(1, descrs.len());
assert_eq!(&descr, descrs[0].as_ref().unwrap());
}
}
}

381
src/origin/store/git.rs Normal file
View File

@ -0,0 +1,381 @@
use crate::origin;
use crate::origin::store;
use std::error::Error;
use std::path::{Path, PathBuf};
use std::{collections, fs, io, sync};
pub struct Origin {
descr: origin::Descr,
repo: gix::ThreadSafeRepository,
tree_object_id: gix::ObjectId,
}
impl origin::Origin for sync::Arc<Origin> {
fn descr(&self) -> &origin::Descr {
&self.descr
}
fn read_file_into(
&self,
path: &str,
into: &mut dyn std::io::Write,
) -> Result<(), origin::ReadFileIntoError> {
let mut clean_path = Path::new(path);
clean_path = clean_path.strip_prefix("/").unwrap_or(clean_path);
let repo = self.repo.to_thread_local();
let file_object = repo
.find_object(self.tree_object_id)
.map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))?
.peel_to_tree()
.map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))?
.lookup_entry_by_path(clean_path)
.map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))?
.ok_or(origin::ReadFileIntoError::FileNotFound)?
.object()
.map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))?;
into.write_all(file_object.data.as_ref())
.map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))?;
Ok(())
}
}
#[derive(thiserror::Error, Debug)]
enum GetOriginError {
#[error("invalid branch name")]
InvalidBranchName,
#[error(transparent)]
Unexpected(Box<dyn Error>),
}
/// git::Store implements the Store trait for any Descr::Git based Origins. If any non-git
/// Descrs are used then this implementation will panic.
pub struct Store {
dir_path: PathBuf,
// to prevent against syncing the same origin more than once at a time, but still allowing
// more than one origin to be syncing at a time
sync_guard: sync::Mutex<collections::HashMap<origin::Descr, ()>>,
origins: sync::RwLock<collections::HashMap<origin::Descr, sync::Arc<Origin>>>,
}
impl Store {
pub fn new(dir_path: PathBuf) -> io::Result<Store> {
fs::create_dir_all(&dir_path)?;
Ok(Store {
dir_path,
sync_guard: sync::Mutex::new(collections::HashMap::new()),
origins: sync::RwLock::new(collections::HashMap::new()),
})
}
fn repo_path(&self, descr: &origin::Descr) -> PathBuf {
self.dir_path.join(descr.id())
}
fn descr_file_path(&self, descr_id: &str) -> PathBuf {
self.dir_path.join(descr_id).join("descr.json")
}
fn branch_ref(&self, branch_name: &str) -> String {
format!("origin/{branch_name}")
}
fn get_origin(
&self,
repo: gix::Repository,
descr: origin::Descr,
) -> Result<sync::Arc<Origin>, GetOriginError> {
let origin::Descr::Git {
ref branch_name, ..
} = descr;
let commit_object_id = repo
.try_find_reference(&self.branch_ref(branch_name))
.map_err(|e| GetOriginError::Unexpected(Box::from(e)))?
.ok_or(GetOriginError::InvalidBranchName)?
.peel_to_id_in_place()
.map_err(|e| GetOriginError::Unexpected(Box::from(e)))?
.detach();
let tree_object_id = repo
.find_object(commit_object_id)
.map_err(|e| GetOriginError::Unexpected(Box::from(e)))?
.try_to_commit_ref()
.map_err(|e| GetOriginError::Unexpected(Box::from(e)))?
.tree();
return Ok(sync::Arc::from(Origin {
descr,
repo: repo.into(),
tree_object_id,
}));
}
fn sync_inner(
&self,
descr: &origin::Descr,
_limits: store::Limits,
) -> Result<gix::Repository, store::SyncError> {
use gix::clone::Error as gixCloneErr;
use gix::progress::Discard;
let should_interrupt = &core::sync::atomic::AtomicBool::new(false);
let repo_path = &self.repo_path(&descr);
// if the path doesn't exist then use the gix clone feature to clone it into the
// directory.
if fs::read_dir(repo_path).is_err() {
fs::create_dir_all(repo_path)
.map_err(|e| store::SyncError::Unexpected(Box::from(e)))?;
let origin::Descr::Git {
ref url,
ref branch_name,
} = descr;
let (repo, _) = gix::prepare_clone_bare(url.clone(), repo_path)
.map_err(|e| match e {
gixCloneErr::Init(gix::init::Error::InvalidBranchName { .. }) => {
store::SyncError::InvalidBranchName
}
gixCloneErr::UrlParse(_) | gixCloneErr::CanonicalizeUrl { .. } => {
store::SyncError::InvalidURL
}
_ => store::SyncError::Unexpected(Box::from(e)),
})?
.fetch_only(Discard, should_interrupt)
.map_err(|_| store::SyncError::InvalidURL)?;
// Check to make sure the branch name exists
// TODO if this fails we should delete repo_path
repo.try_find_reference(&self.branch_ref(branch_name))
.map_err(|e| store::SyncError::Unexpected(Box::from(e)))?
.ok_or(store::SyncError::InvalidBranchName)?;
// Add the descr to the repo directory, so we can know the actual descr later
// TODO if this fails we should delete repo_path
let descr_file = fs::File::create(self.descr_file_path(descr.id().as_ref()))
.map_err(|e| store::SyncError::Unexpected(Box::from(e)))?;
serde_json::to_writer(descr_file, &descr)
.map_err(|e| store::SyncError::Unexpected(Box::from(e)))?;
return Ok(repo);
}
let direction = gix::remote::Direction::Fetch;
let repo = gix::open(repo_path).map_err(|e| store::SyncError::Unexpected(Box::from(e)))?;
let remote = repo
.find_default_remote(direction)
.ok_or_else(|| store::SyncError::Unexpected(Box::from("no default configured")))?
.map_err(|e| store::SyncError::Unexpected(Box::from(e)))?;
remote
.connect(direction)
.map_err(|e| store::SyncError::Unexpected(Box::from(e)))?
.prepare_fetch(Discard, Default::default())
.map_err(|e| store::SyncError::Unexpected(Box::from(e)))?
.receive(Discard, should_interrupt)
.map_err(|e| store::SyncError::Unexpected(Box::from(e)))?;
Ok(repo)
}
}
impl super::Store for Store {
type Origin<'store> = sync::Arc<Origin>
where Self: 'store;
type AllDescrsIter<'store> = Box<dyn Iterator<Item = store::AllDescrsResult<origin::Descr>> + 'store>
where Self: 'store;
fn sync(&self, descr: origin::Descr, limits: store::Limits) -> Result<(), store::SyncError> {
// attempt to lock this descr for syncing, doing so within a new scope so the mutex
// isn't actually being held for the whole method duration.
let is_already_syncing = {
self.sync_guard
.lock()
.unwrap()
.insert(descr.clone(), ())
.is_some()
};
if is_already_syncing {
return Err(store::SyncError::AlreadyInProgress);
}
let res = self.sync_inner(&descr, limits);
self.sync_guard.lock().unwrap().remove(&descr);
let repo = match res {
Ok(repo) => repo,
Err(e) => return Err(e),
};
// repo is synced at this point (though the sync lock is still held), just gotta create
// the origin and store it.
//
// TODO this is a bit of a memory leak, but by the time we get
// to that point this should all be backed by something which isn't local storage
// anyway.
// calling this while the sync lock is held isn't ideal, but it's convenient and
// shouldn't be too terrible generally
let origin = self.get_origin(repo, descr.clone()).map_err(|e| match e {
GetOriginError::InvalidBranchName => store::SyncError::InvalidBranchName,
GetOriginError::Unexpected(e) => store::SyncError::Unexpected(e),
})?;
let mut origins = self.origins.write().unwrap();
(*origins).insert(descr, origin);
Ok(())
}
fn get(&self, descr: origin::Descr) -> Result<Self::Origin<'_>, store::GetError> {
{
let origins = self.origins.read().unwrap();
if let Some(origin) = origins.get(&descr) {
return Ok(origin.clone());
}
}
let repo_path = self.repo_path(&descr);
fs::read_dir(&repo_path).map_err(|e| match e.kind() {
io::ErrorKind::NotFound => store::GetError::NotFound,
_ => store::GetError::Unexpected(Box::from(e)),
})?;
let repo = gix::open(&repo_path).map_err(|e| store::GetError::Unexpected(Box::from(e)))?;
let origin = self.get_origin(repo, descr.clone()).map_err(|e| match e {
// it's not expected that the branch name is invalid at this point, it must have
// existed for sync to have been successful.
GetOriginError::InvalidBranchName => store::GetError::Unexpected(Box::from(e)),
GetOriginError::Unexpected(e) => store::GetError::Unexpected(e),
})?;
let mut origins = self.origins.write().unwrap();
(*origins).insert(descr, origin.clone());
Ok(origin)
}
fn all_descrs(&self) -> store::AllDescrsResult<Self::AllDescrsIter<'_>> {
Ok(Box::from(
fs::read_dir(&self.dir_path)
.map_err(|e| store::AllDescrsError::Unexpected(Box::from(e)))?
.map(
|dir_entry_res: io::Result<fs::DirEntry>| -> store::AllDescrsResult<origin::Descr> {
let descr_id: String = dir_entry_res
.map_err(|e| store::AllDescrsError::Unexpected(Box::from(e)))?
.file_name()
.to_str()
.ok_or_else(|| {
store::AllDescrsError::Unexpected(Box::from(
"couldn't convert os string to &str",
))
})?
.into();
let descr_file_path = self.descr_file_path(descr_id.as_ref());
// TODO it's possible that opening the file will fail if syncing is
// still ongoing, as writing the descr file is the last step after
// initial sync has succeeded.
let descr_file = fs::File::open(descr_file_path)
.map_err(|e| store::AllDescrsError::Unexpected(Box::from(e)))?;
let descr = serde_json::from_reader(descr_file)
.map_err(|e| store::AllDescrsError::Unexpected(Box::from(e)))?;
Ok(descr)
},
),
))
}
}
#[cfg(test)]
mod tests {
use crate::origin::store;
use crate::origin::store::Store;
use crate::origin::{self, Origin};
use std::sync;
use tempdir::TempDir;
#[test]
fn basic() {
let tmp_dir = TempDir::new("origin_store_git").unwrap();
let curr_dir = format!("file://{}", std::env::current_dir().unwrap().display());
let descr = origin::Descr::Git {
url: curr_dir.clone(),
branch_name: String::from("master"),
};
let other_descr = origin::Descr::Git {
url: curr_dir.clone(),
branch_name: String::from("some_other_branch"),
};
let limits = store::Limits {};
let store = super::Store::new(tmp_dir.path().to_path_buf()).expect("store created");
store
.sync(descr.clone(), limits)
.expect("sync should succeed");
store
.sync(descr.clone(), limits)
.expect("second sync should succeed");
assert!(matches!(
store.get(other_descr),
Err::<sync::Arc<super::Origin>, store::GetError>(store::GetError::NotFound),
));
let origin = store.get(descr.clone()).expect("origin retrieved");
assert_eq!(&descr, origin.descr());
let assert_write = |path: &str| {
let mut into: Vec<u8> = vec![];
origin
.read_file_into(path, &mut into)
.expect("write should succeed");
assert!(into.len() > 0);
};
assert_write("src/lib.rs");
assert_write("/src/lib.rs");
// File doesn't exist
let mut into: Vec<u8> = vec![];
assert!(matches!(
origin.read_file_into("DNE", &mut into),
Err::<(), origin::ReadFileIntoError>(origin::ReadFileIntoError::FileNotFound),
));
assert_eq!(into.len(), 0);
let descrs = store
.all_descrs()
.expect("all_descrs callsed")
.collect::<Vec<Result<origin::Descr, store::AllDescrsError>>>();
assert_eq!(1, descrs.len());
assert_eq!(&descr, descrs[0].as_ref().unwrap());
}
}