use crate::error::unexpected::{self, Intoable, Mappable}; use crate::origin::{self, store}; use std::path::{Path, PathBuf}; use std::{collections, fs, io, sync}; #[derive(Clone)] struct Origin { descr: origin::Descr, repo: sync::Arc, tree_object_id: gix::ObjectId, } impl origin::Origin for Origin { fn descr(&self) -> &origin::Descr { &self.descr } fn read_file_into( &self, path: &str, into: &mut dyn std::io::Write, ) -> Result<(), origin::ReadFileIntoError> { let mut clean_path = Path::new(path); clean_path = clean_path.strip_prefix("/").unwrap_or(clean_path); let repo = self.repo.to_thread_local(); let file_object = repo .find_object(self.tree_object_id) .map_unexpected_while(|| format!("finding tree object {}", self.tree_object_id))? .peel_to_tree() .map_unexpected_while(|| format!("peeling tree object {}", self.tree_object_id))? .lookup_entry_by_path(clean_path) .map_unexpected_while(|| { format!( "looking up {} in tree object {}", clean_path.display(), self.tree_object_id ) })? .ok_or(origin::ReadFileIntoError::FileNotFound)? .object() .or_unexpected()?; into.write_all(file_object.data.as_ref()) .or_unexpected_while("copying out file")?; Ok(()) } } #[derive(thiserror::Error, Debug)] enum GetOriginError { #[error("invalid branch name")] InvalidBranchName, #[error(transparent)] Unexpected(#[from] unexpected::Error), } /// git::Store implements the Store trait for any Descr::Git based Origins. If any non-git /// Descrs are used then this implementation will panic. struct Store { dir_path: PathBuf, // to prevent against syncing the same origin more than once at a time, but still allowing // more than one origin to be syncing at a time sync_guard: sync::Mutex>, origins: sync::RwLock>>, } pub fn new(dir_path: PathBuf) -> io::Result> { fs::create_dir_all(&dir_path)?; Ok(sync::Arc::new(Store { dir_path, sync_guard: sync::Mutex::new(collections::HashMap::new()), origins: sync::RwLock::new(collections::HashMap::new()), })) } impl Store { fn repo_path(&self, descr: &origin::Descr) -> PathBuf { self.dir_path.join(descr.id()) } fn descr_file_path(&self, descr_id: &str) -> PathBuf { self.dir_path.join(descr_id).join("descr.json") } fn branch_ref(&self, branch_name: &str) -> String { format!("origin/{branch_name}") } fn get_origin( &self, repo: gix::Repository, descr: origin::Descr, ) -> Result { let origin::Descr::Git { ref branch_name, .. } = descr; let branch_ref = self.branch_ref(branch_name); let commit_object_id = repo .try_find_reference(&branch_ref) .map_unexpected_while(|| format!("finding branch ref {branch_ref}"))? .ok_or(GetOriginError::InvalidBranchName)? .peel_to_id_in_place() .or_unexpected_while("peeling id in place")? .detach(); let tree_object_id = repo .find_object(commit_object_id) .map_unexpected_while(|| format!("finding commit object {commit_object_id}"))? .try_to_commit_ref() .map_unexpected_while(|| format!("parsing {commit_object_id} as commit"))? .tree(); Ok(Origin { descr, repo: sync::Arc::new(repo.into()), tree_object_id, }) } fn sync_inner( &self, descr: &origin::Descr, _limits: store::Limits, ) -> Result { use gix::clone::Error as gixCloneErr; use gix::progress::Discard; let should_interrupt = &core::sync::atomic::AtomicBool::new(false); let repo_path = &self.repo_path(descr); // if the path doesn't exist then use the gix clone feature to clone it into the // directory. if fs::read_dir(repo_path).is_err() { fs::create_dir_all(repo_path) .map_unexpected_while(|| format!("creating {}", repo_path.display()))?; let origin::Descr::Git { ref url, ref branch_name, } = descr; let (repo, _) = gix::prepare_clone_bare(url.clone(), repo_path) .map_err(|e| match e { gixCloneErr::Init(gix::init::Error::InvalidBranchName { .. }) => { store::SyncError::InvalidBranchName } gixCloneErr::UrlParse(_) | gixCloneErr::CanonicalizeUrl { .. } => { store::SyncError::InvalidURL } _ => e .into_unexpected_while(format!( "cloning {} into {}", url, repo_path.display() )) .into(), })? .fetch_only(Discard, should_interrupt) .map_err(|_| store::SyncError::InvalidURL)?; // Check to make sure the branch name exists // TODO if this fails we should delete repo_path let branch_ref = self.branch_ref(branch_name); repo.try_find_reference(&branch_ref) .map_unexpected_while(|| format!("finding branch ref {branch_ref}"))? .ok_or(store::SyncError::InvalidBranchName)?; // Add the descr to the repo directory, so we can know the actual descr later // TODO if this fails we should delete repo_path let file_path = self.descr_file_path(descr.id().as_ref()); let descr_file = fs::File::create(&file_path) .map_unexpected_while(|| format!("creating {}", file_path.display()))?; serde_json::to_writer(descr_file, &descr) .map_unexpected_while(|| format!("writing descr to {}", file_path.display()))?; return Ok(repo); } let direction = gix::remote::Direction::Fetch; let repo = gix::open(repo_path) .map_unexpected_while(|| format!("opening repo at {}", repo_path.display()))?; let remote = repo .find_default_remote(direction) .ok_or_else(|| unexpected::Error::from("no default configured"))? .or_unexpected_while("finding default remote for fetching")?; remote .connect(direction) .or_unexpected_while("connecting to remote")? .prepare_fetch(Discard, Default::default()) .or_unexpected_while("preparing fetch")? .receive(Discard, should_interrupt) .or_unexpected_while("fetching from remote")?; Ok(repo) } } impl super::Store for Store { fn sync(&self, descr: origin::Descr, limits: store::Limits) -> Result<(), store::SyncError> { // attempt to lock this descr for syncing, doing so within a new scope so the mutex // isn't actually being held for the whole method duration. let is_already_syncing = { self.sync_guard .lock() .unwrap() .insert(descr.clone(), ()) .is_some() }; if is_already_syncing { return Err(store::SyncError::AlreadyInProgress); } let res = self.sync_inner(&descr, limits); self.sync_guard.lock().unwrap().remove(&descr); let repo = match res { Ok(repo) => repo, Err(e) => return Err(e), }; // repo is synced at this point (though the sync lock is still held), just gotta create // the origin and store it. // // TODO this is a bit of a memory leak, but by the time we get // to that point this should all be backed by something which isn't local storage // anyway. // calling this while the sync lock is held isn't ideal, but it's convenient and // shouldn't be too terrible generally let origin = self.get_origin(repo, descr.clone()).map_err(|e| match e { GetOriginError::InvalidBranchName => store::SyncError::InvalidBranchName, GetOriginError::Unexpected(e) => store::SyncError::Unexpected(e), })?; let mut origins = self.origins.write().unwrap(); (*origins).insert(descr, sync::Arc::new(origin)); Ok(()) } fn get(&self, descr: origin::Descr) -> Result, store::GetError> { { let origins = self.origins.read().unwrap(); if let Some(origin) = origins.get(&descr) { return Ok(origin.clone()); } } let repo_path = self.repo_path(&descr); fs::read_dir(&repo_path).map_err(|e| match e.kind() { io::ErrorKind::NotFound => store::GetError::NotFound, _ => e .into_unexpected_while(format!("checking if {} exists", repo_path.display())) .into(), })?; let repo = gix::open(&repo_path) .map_unexpected_while(|| format!("opening {} as git repo", repo_path.display()))?; let origin = self.get_origin(repo, descr.clone()).map_err(|e| match e { // it's not expected that the branch name is invalid at this point, it must have // existed for sync to have been successful. GetOriginError::InvalidBranchName => e.into_unexpected().into(), GetOriginError::Unexpected(e) => store::GetError::Unexpected(e), })?; let origin = sync::Arc::new(origin.clone()); let mut origins = self.origins.write().unwrap(); (*origins).insert(descr, origin.clone()); Ok(origin) } fn all_descrs(&self) -> Result, store::AllDescrsError> { fs::read_dir(&self.dir_path).or_unexpected()?.map( |dir_entry_res: io::Result| -> Result { let descr_id: String = dir_entry_res .or_unexpected()? .file_name() .to_str() .ok_or_else(|| { unexpected::Error::from("couldn't convert os string to &str") })? .into(); let descr_file_path = self.descr_file_path(descr_id.as_ref()); // TODO it's possible that opening the file will fail if syncing is // still ongoing, as writing the descr file is the last step after // initial sync has succeeded. let descr_file = fs::File::open(descr_file_path.as_path()) .map_unexpected_while(|| { format!("opening descr file {}", descr_file_path.display()) })?; let descr = serde_json::from_reader(descr_file).map_unexpected_while(|| { format!("reading descr file {}", descr_file_path.display()) })?; Ok(descr) }, ).try_collect() } } #[cfg(test)] mod tests { use crate::origin::store; use crate::origin::store::Store; use crate::origin::{self, Origin}; use tempdir::TempDir; #[test] fn basic() { let tmp_dir = TempDir::new("origin_store_git").unwrap(); let curr_dir = format!("file://{}", std::env::current_dir().unwrap().display()); let descr = origin::Descr::Git { url: curr_dir.clone(), branch_name: String::from("main"), }; let other_descr = origin::Descr::Git { url: curr_dir.clone(), branch_name: String::from("some_other_branch"), }; let limits = store::Limits {}; let store = super::new(tmp_dir.path().to_path_buf()).expect("store created"); store .sync(descr.clone(), limits) .expect("sync should succeed"); store .sync(descr.clone(), limits) .expect("second sync should succeed"); assert!(matches!( store.get(other_descr), Err::<_, store::GetError>(store::GetError::NotFound), )); let origin = store.get(descr.clone()).expect("origin retrieved"); assert_eq!(&descr, origin.descr()); let assert_write = |path: &str| { let mut into: Vec = vec![]; origin .read_file_into(path, &mut into) .expect("write should succeed"); assert!(into.len() > 0); }; assert_write("src/lib.rs"); assert_write("/src/lib.rs"); // File doesn't exist let mut into: Vec = vec![]; assert!(matches!( origin.read_file_into("DNE", &mut into), Err::<(), origin::ReadFileIntoError>(origin::ReadFileIntoError::FileNotFound), )); assert_eq!(into.len(), 0); let descrs = store .all_descrs() .expect("all_descrs called") .into_iter() .collect::>>(); assert_eq!(1, descrs.len()); assert_eq!(&descr, descrs[0].as_ref().unwrap()); } }