domani/src/origin/store/git.rs

396 lines
14 KiB
Rust
Raw Normal View History

use crate::error::unexpected::{self, Intoable, Mappable};
use crate::origin::{self, store};
2023-05-11 09:54:25 +00:00
use std::path::{Path, PathBuf};
use std::{collections, fs, io, sync};
2023-06-17 14:04:26 +00:00
#[derive(Clone)]
2023-05-11 12:31:48 +00:00
struct Origin {
2023-05-11 09:54:25 +00:00
descr: origin::Descr,
2023-06-17 14:04:26 +00:00
repo: sync::Arc<gix::ThreadSafeRepository>,
2023-05-11 09:54:25 +00:00
tree_object_id: gix::ObjectId,
}
2023-06-17 14:04:26 +00:00
impl origin::Origin for Origin {
2023-05-11 09:54:25 +00:00
fn descr(&self) -> &origin::Descr {
&self.descr
}
fn read_file_into(
&self,
path: &str,
into: &mut dyn std::io::Write,
) -> Result<(), origin::ReadFileIntoError> {
let mut clean_path = Path::new(path);
clean_path = clean_path.strip_prefix("/").unwrap_or(clean_path);
let repo = self.repo.to_thread_local();
let file_object = repo
.find_object(self.tree_object_id)
.map_unexpected_while(|| format!("finding tree object {}", self.tree_object_id))?
2023-05-11 09:54:25 +00:00
.peel_to_tree()
.map_unexpected_while(|| format!("peeling tree object {}", self.tree_object_id))?
2023-05-11 09:54:25 +00:00
.lookup_entry_by_path(clean_path)
.map_unexpected_while(|| {
format!(
"looking up {} in tree object {}",
clean_path.display(),
self.tree_object_id
)
})?
2023-05-11 09:54:25 +00:00
.ok_or(origin::ReadFileIntoError::FileNotFound)?
.object()
.or_unexpected()?;
2023-05-11 09:54:25 +00:00
into.write_all(file_object.data.as_ref())
.or_unexpected_while("copying out file")?;
2023-05-11 09:54:25 +00:00
Ok(())
}
}
#[derive(thiserror::Error, Debug)]
enum GetOriginError {
#[error("invalid branch name")]
InvalidBranchName,
#[error(transparent)]
Unexpected(#[from] unexpected::Error),
2023-05-11 09:54:25 +00:00
}
/// git::Store implements the Store trait for any Descr::Git based Origins. If any non-git
/// Descrs are used then this implementation will panic.
2023-05-11 12:31:48 +00:00
struct Store {
2023-05-11 09:54:25 +00:00
dir_path: PathBuf,
// to prevent against syncing the same origin more than once at a time, but still allowing
// more than one origin to be syncing at a time
sync_guard: sync::Mutex<collections::HashMap<origin::Descr, ()>>,
origins: sync::RwLock<collections::HashMap<origin::Descr, sync::Arc<Origin>>>,
2023-05-11 09:54:25 +00:00
}
pub fn new(dir_path: PathBuf) -> io::Result<sync::Arc<dyn super::Store>> {
fs::create_dir_all(&dir_path)?;
2023-05-15 19:18:33 +00:00
Ok(sync::Arc::new(Store {
dir_path,
sync_guard: sync::Mutex::new(collections::HashMap::new()),
origins: sync::RwLock::new(collections::HashMap::new()),
2023-05-15 19:18:33 +00:00
}))
}
2023-05-11 09:54:25 +00:00
impl Store {
2023-05-11 09:54:25 +00:00
fn repo_path(&self, descr: &origin::Descr) -> PathBuf {
self.dir_path.join(descr.id())
}
fn descr_file_path(&self, descr_id: &str) -> PathBuf {
self.dir_path.join(descr_id).join("descr.json")
}
fn branch_ref(&self, branch_name: &str) -> String {
format!("origin/{branch_name}")
}
fn get_origin(
&self,
repo: gix::Repository,
descr: origin::Descr,
2023-06-17 14:04:26 +00:00
) -> Result<Origin, GetOriginError> {
2023-05-11 09:54:25 +00:00
let origin::Descr::Git {
ref branch_name, ..
} = descr;
let branch_ref = self.branch_ref(branch_name);
2023-05-11 09:54:25 +00:00
let commit_object_id = repo
.try_find_reference(&branch_ref)
.map_unexpected_while(|| format!("finding branch ref {branch_ref}"))?
2023-05-11 09:54:25 +00:00
.ok_or(GetOriginError::InvalidBranchName)?
.peel_to_id_in_place()
.or_unexpected_while("peeling id in place")?
2023-05-11 09:54:25 +00:00
.detach();
let tree_object_id = repo
.find_object(commit_object_id)
.map_unexpected_while(|| format!("finding commit object {commit_object_id}"))?
2023-05-11 09:54:25 +00:00
.try_to_commit_ref()
.map_unexpected_while(|| format!("parsing {commit_object_id} as commit"))?
2023-05-11 09:54:25 +00:00
.tree();
2023-06-17 14:04:26 +00:00
Ok(Origin {
2023-05-11 09:54:25 +00:00
descr,
2023-06-17 14:04:26 +00:00
repo: sync::Arc::new(repo.into()),
2023-05-11 09:54:25 +00:00
tree_object_id,
2023-06-17 14:04:26 +00:00
})
2023-05-11 09:54:25 +00:00
}
fn sync_inner(
&self,
descr: &origin::Descr,
_limits: store::Limits,
) -> Result<gix::Repository, store::SyncError> {
use gix::clone::Error as gixCloneErr;
use gix::progress::Discard;
let should_interrupt = &core::sync::atomic::AtomicBool::new(false);
2023-05-16 14:20:01 +00:00
let repo_path = &self.repo_path(descr);
2023-05-11 09:54:25 +00:00
// if the path doesn't exist then use the gix clone feature to clone it into the
// directory.
if fs::read_dir(repo_path).is_err() {
fs::create_dir_all(repo_path)
.map_unexpected_while(|| format!("creating {}", repo_path.display()))?;
2023-05-11 09:54:25 +00:00
let origin::Descr::Git {
ref url,
ref branch_name,
} = descr;
let (repo, _) = gix::prepare_clone_bare(url.clone(), repo_path)
.map_err(|e| match e {
gixCloneErr::Init(gix::init::Error::InvalidBranchName { .. }) => {
store::SyncError::InvalidBranchName
}
gixCloneErr::UrlParse(_) | gixCloneErr::CanonicalizeUrl { .. } => {
store::SyncError::InvalidURL
}
_ => e
.into_unexpected_while(format!(
"cloning {} into {}",
url,
repo_path.display()
))
.into(),
2023-05-11 09:54:25 +00:00
})?
.fetch_only(Discard, should_interrupt)
.map_err(|_| store::SyncError::InvalidURL)?;
// Check to make sure the branch name exists
// TODO if this fails we should delete repo_path
let branch_ref = self.branch_ref(branch_name);
repo.try_find_reference(&branch_ref)
.map_unexpected_while(|| format!("finding branch ref {branch_ref}"))?
2023-05-11 09:54:25 +00:00
.ok_or(store::SyncError::InvalidBranchName)?;
// Add the descr to the repo directory, so we can know the actual descr later
// TODO if this fails we should delete repo_path
let file_path = self.descr_file_path(descr.id().as_ref());
let descr_file = fs::File::create(&file_path)
.map_unexpected_while(|| format!("creating {}", file_path.display()))?;
2023-05-11 09:54:25 +00:00
serde_json::to_writer(descr_file, &descr)
.map_unexpected_while(|| format!("writing descr to {}", file_path.display()))?;
2023-05-11 09:54:25 +00:00
return Ok(repo);
}
let direction = gix::remote::Direction::Fetch;
let repo = gix::open(repo_path)
.map_unexpected_while(|| format!("opening repo at {}", repo_path.display()))?;
2023-05-11 09:54:25 +00:00
let remote = repo
.find_default_remote(direction)
.ok_or_else(|| unexpected::Error::from("no default configured"))?
.or_unexpected_while("finding default remote for fetching")?;
2023-05-11 09:54:25 +00:00
remote
.connect(direction)
.or_unexpected_while("connecting to remote")?
2023-05-11 09:54:25 +00:00
.prepare_fetch(Discard, Default::default())
.or_unexpected_while("preparing fetch")?
2023-05-11 09:54:25 +00:00
.receive(Discard, should_interrupt)
.or_unexpected_while("fetching from remote")?;
2023-05-11 09:54:25 +00:00
Ok(repo)
}
}
impl super::Store for Store {
2023-05-11 09:54:25 +00:00
fn sync(&self, descr: origin::Descr, limits: store::Limits) -> Result<(), store::SyncError> {
// attempt to lock this descr for syncing, doing so within a new scope so the mutex
// isn't actually being held for the whole method duration.
let is_already_syncing = {
self.sync_guard
.lock()
.unwrap()
.insert(descr.clone(), ())
.is_some()
};
if is_already_syncing {
return Err(store::SyncError::AlreadyInProgress);
}
let res = self.sync_inner(&descr, limits);
self.sync_guard.lock().unwrap().remove(&descr);
let repo = match res {
Ok(repo) => repo,
Err(e) => return Err(e),
};
// repo is synced at this point (though the sync lock is still held), just gotta create
// the origin and store it.
//
// TODO this is a bit of a memory leak, but by the time we get
// to that point this should all be backed by something which isn't local storage
// anyway.
// calling this while the sync lock is held isn't ideal, but it's convenient and
// shouldn't be too terrible generally
let origin = self.get_origin(repo, descr.clone()).map_err(|e| match e {
GetOriginError::InvalidBranchName => store::SyncError::InvalidBranchName,
GetOriginError::Unexpected(e) => store::SyncError::Unexpected(e),
})?;
let mut origins = self.origins.write().unwrap();
(*origins).insert(descr, sync::Arc::new(origin));
2023-05-11 09:54:25 +00:00
Ok(())
}
fn get(&self, descr: origin::Descr) -> Result<sync::Arc<dyn origin::Origin>, store::GetError> {
2023-05-11 09:54:25 +00:00
{
let origins = self.origins.read().unwrap();
if let Some(origin) = origins.get(&descr) {
return Ok(origin.clone());
}
}
let repo_path = self.repo_path(&descr);
fs::read_dir(&repo_path).map_err(|e| match e.kind() {
io::ErrorKind::NotFound => store::GetError::NotFound,
_ => e
.into_unexpected_while(format!("checking if {} exists", repo_path.display()))
.into(),
2023-05-11 09:54:25 +00:00
})?;
let repo = gix::open(&repo_path)
.map_unexpected_while(|| format!("opening {} as git repo", repo_path.display()))?;
2023-05-11 09:54:25 +00:00
let origin = self.get_origin(repo, descr.clone()).map_err(|e| match e {
// it's not expected that the branch name is invalid at this point, it must have
// existed for sync to have been successful.
GetOriginError::InvalidBranchName => e.into_unexpected().into(),
2023-05-11 09:54:25 +00:00
GetOriginError::Unexpected(e) => store::GetError::Unexpected(e),
})?;
let origin = sync::Arc::new(origin.clone());
2023-05-11 09:54:25 +00:00
let mut origins = self.origins.write().unwrap();
2023-05-11 09:54:25 +00:00
(*origins).insert(descr, origin.clone());
Ok(origin)
}
fn all_descrs(&self) -> Result<Vec<origin::Descr>, store::AllDescrsError> {
fs::read_dir(&self.dir_path).or_unexpected()?.map(
|dir_entry_res: io::Result<fs::DirEntry>| -> Result<origin::Descr, store::AllDescrsError> {
let descr_id: String = dir_entry_res
.or_unexpected()?
.file_name()
.to_str()
.ok_or_else(|| {
unexpected::Error::from("couldn't convert os string to &str")
})?
.into();
let descr_file_path = self.descr_file_path(descr_id.as_ref());
// TODO it's possible that opening the file will fail if syncing is
// still ongoing, as writing the descr file is the last step after
// initial sync has succeeded.
let descr_file = fs::File::open(descr_file_path.as_path())
.map_unexpected_while(|| {
format!("opening descr file {}", descr_file_path.display())
})?;
let descr = serde_json::from_reader(descr_file).map_unexpected_while(|| {
format!("reading descr file {}", descr_file_path.display())
})?;
Ok(descr)
},
).try_collect()
2023-05-11 09:54:25 +00:00
}
}
#[cfg(test)]
mod tests {
use crate::origin::store;
use crate::origin::store::Store;
use crate::origin::{self, Origin};
use tempdir::TempDir;
#[test]
fn basic() {
let tmp_dir = TempDir::new("origin_store_git").unwrap();
let curr_dir = format!("file://{}", std::env::current_dir().unwrap().display());
let descr = origin::Descr::Git {
url: curr_dir.clone(),
branch_name: String::from("main"),
2023-05-11 09:54:25 +00:00
};
let other_descr = origin::Descr::Git {
url: curr_dir.clone(),
branch_name: String::from("some_other_branch"),
};
let limits = store::Limits {};
2023-05-11 12:31:48 +00:00
let store = super::new(tmp_dir.path().to_path_buf()).expect("store created");
2023-05-11 09:54:25 +00:00
store
.sync(descr.clone(), limits)
.expect("sync should succeed");
store
.sync(descr.clone(), limits)
.expect("second sync should succeed");
assert!(matches!(
store.get(other_descr),
2023-05-11 12:31:48 +00:00
Err::<_, store::GetError>(store::GetError::NotFound),
2023-05-11 09:54:25 +00:00
));
let origin = store.get(descr.clone()).expect("origin retrieved");
assert_eq!(&descr, origin.descr());
let assert_write = |path: &str| {
let mut into: Vec<u8> = vec![];
origin
.read_file_into(path, &mut into)
.expect("write should succeed");
assert!(into.len() > 0);
};
assert_write("src/lib.rs");
assert_write("/src/lib.rs");
// File doesn't exist
let mut into: Vec<u8> = vec![];
assert!(matches!(
origin.read_file_into("DNE", &mut into),
Err::<(), origin::ReadFileIntoError>(origin::ReadFileIntoError::FileNotFound),
));
assert_eq!(into.len(), 0);
let descrs = store
.all_descrs()
2023-05-11 12:31:48 +00:00
.expect("all_descrs called")
.into_iter()
2023-05-11 09:54:25 +00:00
.collect::<Vec<Result<origin::Descr, store::AllDescrsError>>>();
assert_eq!(1, descrs.len());
assert_eq!(&descr, descrs[0].as_ref().unwrap());
}
}