use crate::origin; use crate::origin::store; use std::error::Error; use std::path::{Path, PathBuf}; use std::{collections, fs, io, sync}; struct Origin { descr: origin::Descr, repo: gix::ThreadSafeRepository, tree_object_id: gix::ObjectId, } impl origin::Origin for sync::Arc { fn descr(&self) -> &origin::Descr { &self.descr } fn read_file_into( &self, path: &str, into: &mut dyn std::io::Write, ) -> Result<(), origin::ReadFileIntoError> { let mut clean_path = Path::new(path); clean_path = clean_path.strip_prefix("/").unwrap_or(clean_path); let repo = self.repo.to_thread_local(); let file_object = repo .find_object(self.tree_object_id) .map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))? .peel_to_tree() .map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))? .lookup_entry_by_path(clean_path) .map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))? .ok_or(origin::ReadFileIntoError::FileNotFound)? .object() .map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))?; into.write_all(file_object.data.as_ref()) .map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))?; Ok(()) } } #[derive(thiserror::Error, Debug)] enum GetOriginError { #[error("invalid branch name")] InvalidBranchName, #[error(transparent)] Unexpected(Box), } /// git::Store implements the Store trait for any Descr::Git based Origins. If any non-git /// Descrs are used then this implementation will panic. struct Store { dir_path: PathBuf, // to prevent against syncing the same origin more than once at a time, but still allowing // more than one origin to be syncing at a time sync_guard: sync::Mutex>, origins: sync::RwLock>>, } pub fn new(dir_path: PathBuf) -> io::Result { fs::create_dir_all(&dir_path)?; Ok(Store { dir_path, sync_guard: sync::Mutex::new(collections::HashMap::new()), origins: sync::RwLock::new(collections::HashMap::new()), }) } impl Store { fn repo_path(&self, descr: &origin::Descr) -> PathBuf { self.dir_path.join(descr.id()) } fn descr_file_path(&self, descr_id: &str) -> PathBuf { self.dir_path.join(descr_id).join("descr.json") } fn branch_ref(&self, branch_name: &str) -> String { format!("origin/{branch_name}") } fn get_origin( &self, repo: gix::Repository, descr: origin::Descr, ) -> Result, GetOriginError> { let origin::Descr::Git { ref branch_name, .. } = descr; let commit_object_id = repo .try_find_reference(&self.branch_ref(branch_name)) .map_err(|e| GetOriginError::Unexpected(Box::from(e)))? .ok_or(GetOriginError::InvalidBranchName)? .peel_to_id_in_place() .map_err(|e| GetOriginError::Unexpected(Box::from(e)))? .detach(); let tree_object_id = repo .find_object(commit_object_id) .map_err(|e| GetOriginError::Unexpected(Box::from(e)))? .try_to_commit_ref() .map_err(|e| GetOriginError::Unexpected(Box::from(e)))? .tree(); return Ok(sync::Arc::from(Origin { descr, repo: repo.into(), tree_object_id, })); } fn sync_inner( &self, descr: &origin::Descr, _limits: store::Limits, ) -> Result { use gix::clone::Error as gixCloneErr; use gix::progress::Discard; let should_interrupt = &core::sync::atomic::AtomicBool::new(false); let repo_path = &self.repo_path(&descr); // if the path doesn't exist then use the gix clone feature to clone it into the // directory. if fs::read_dir(repo_path).is_err() { fs::create_dir_all(repo_path) .map_err(|e| store::SyncError::Unexpected(Box::from(e)))?; let origin::Descr::Git { ref url, ref branch_name, } = descr; let (repo, _) = gix::prepare_clone_bare(url.clone(), repo_path) .map_err(|e| match e { gixCloneErr::Init(gix::init::Error::InvalidBranchName { .. }) => { store::SyncError::InvalidBranchName } gixCloneErr::UrlParse(_) | gixCloneErr::CanonicalizeUrl { .. } => { store::SyncError::InvalidURL } _ => store::SyncError::Unexpected(Box::from(e)), })? .fetch_only(Discard, should_interrupt) .map_err(|_| store::SyncError::InvalidURL)?; // Check to make sure the branch name exists // TODO if this fails we should delete repo_path repo.try_find_reference(&self.branch_ref(branch_name)) .map_err(|e| store::SyncError::Unexpected(Box::from(e)))? .ok_or(store::SyncError::InvalidBranchName)?; // Add the descr to the repo directory, so we can know the actual descr later // TODO if this fails we should delete repo_path let descr_file = fs::File::create(self.descr_file_path(descr.id().as_ref())) .map_err(|e| store::SyncError::Unexpected(Box::from(e)))?; serde_json::to_writer(descr_file, &descr) .map_err(|e| store::SyncError::Unexpected(Box::from(e)))?; return Ok(repo); } let direction = gix::remote::Direction::Fetch; let repo = gix::open(repo_path).map_err(|e| store::SyncError::Unexpected(Box::from(e)))?; let remote = repo .find_default_remote(direction) .ok_or_else(|| store::SyncError::Unexpected(Box::from("no default configured")))? .map_err(|e| store::SyncError::Unexpected(Box::from(e)))?; remote .connect(direction) .map_err(|e| store::SyncError::Unexpected(Box::from(e)))? .prepare_fetch(Discard, Default::default()) .map_err(|e| store::SyncError::Unexpected(Box::from(e)))? .receive(Discard, should_interrupt) .map_err(|e| store::SyncError::Unexpected(Box::from(e)))?; Ok(repo) } } impl super::Store for Store { type Origin<'store> = sync::Arc where Self: 'store; type AllDescrsIter<'store> = Box> + 'store> where Self: 'store; fn sync(&self, descr: origin::Descr, limits: store::Limits) -> Result<(), store::SyncError> { // attempt to lock this descr for syncing, doing so within a new scope so the mutex // isn't actually being held for the whole method duration. let is_already_syncing = { self.sync_guard .lock() .unwrap() .insert(descr.clone(), ()) .is_some() }; if is_already_syncing { return Err(store::SyncError::AlreadyInProgress); } let res = self.sync_inner(&descr, limits); self.sync_guard.lock().unwrap().remove(&descr); let repo = match res { Ok(repo) => repo, Err(e) => return Err(e), }; // repo is synced at this point (though the sync lock is still held), just gotta create // the origin and store it. // // TODO this is a bit of a memory leak, but by the time we get // to that point this should all be backed by something which isn't local storage // anyway. // calling this while the sync lock is held isn't ideal, but it's convenient and // shouldn't be too terrible generally let origin = self.get_origin(repo, descr.clone()).map_err(|e| match e { GetOriginError::InvalidBranchName => store::SyncError::InvalidBranchName, GetOriginError::Unexpected(e) => store::SyncError::Unexpected(e), })?; let mut origins = self.origins.write().unwrap(); (*origins).insert(descr, origin); Ok(()) } fn get(&self, descr: origin::Descr) -> Result, store::GetError> { { let origins = self.origins.read().unwrap(); if let Some(origin) = origins.get(&descr) { return Ok(origin.clone()); } } let repo_path = self.repo_path(&descr); fs::read_dir(&repo_path).map_err(|e| match e.kind() { io::ErrorKind::NotFound => store::GetError::NotFound, _ => store::GetError::Unexpected(Box::from(e)), })?; let repo = gix::open(&repo_path).map_err(|e| store::GetError::Unexpected(Box::from(e)))?; let origin = self.get_origin(repo, descr.clone()).map_err(|e| match e { // it's not expected that the branch name is invalid at this point, it must have // existed for sync to have been successful. GetOriginError::InvalidBranchName => store::GetError::Unexpected(Box::from(e)), GetOriginError::Unexpected(e) => store::GetError::Unexpected(e), })?; let mut origins = self.origins.write().unwrap(); (*origins).insert(descr, origin.clone()); Ok(origin) } fn all_descrs(&self) -> store::AllDescrsResult> { Ok(Box::from( fs::read_dir(&self.dir_path) .map_err(|e| store::AllDescrsError::Unexpected(Box::from(e)))? .map( |dir_entry_res: io::Result| -> store::AllDescrsResult { let descr_id: String = dir_entry_res .map_err(|e| store::AllDescrsError::Unexpected(Box::from(e)))? .file_name() .to_str() .ok_or_else(|| { store::AllDescrsError::Unexpected(Box::from( "couldn't convert os string to &str", )) })? .into(); let descr_file_path = self.descr_file_path(descr_id.as_ref()); // TODO it's possible that opening the file will fail if syncing is // still ongoing, as writing the descr file is the last step after // initial sync has succeeded. let descr_file = fs::File::open(descr_file_path) .map_err(|e| store::AllDescrsError::Unexpected(Box::from(e)))?; let descr = serde_json::from_reader(descr_file) .map_err(|e| store::AllDescrsError::Unexpected(Box::from(e)))?; Ok(descr) }, ), )) } } #[cfg(test)] mod tests { use crate::origin::store; use crate::origin::store::Store; use crate::origin::{self, Origin}; use tempdir::TempDir; #[test] fn basic() { let tmp_dir = TempDir::new("origin_store_git").unwrap(); let curr_dir = format!("file://{}", std::env::current_dir().unwrap().display()); let descr = origin::Descr::Git { url: curr_dir.clone(), branch_name: String::from("master"), }; let other_descr = origin::Descr::Git { url: curr_dir.clone(), branch_name: String::from("some_other_branch"), }; let limits = store::Limits {}; let store = super::new(tmp_dir.path().to_path_buf()).expect("store created"); store .sync(descr.clone(), limits) .expect("sync should succeed"); store .sync(descr.clone(), limits) .expect("second sync should succeed"); assert!(matches!( store.get(other_descr), Err::<_, store::GetError>(store::GetError::NotFound), )); let origin = store.get(descr.clone()).expect("origin retrieved"); assert_eq!(&descr, origin.descr()); let assert_write = |path: &str| { let mut into: Vec = vec![]; origin .read_file_into(path, &mut into) .expect("write should succeed"); assert!(into.len() > 0); }; assert_write("src/lib.rs"); assert_write("/src/lib.rs"); // File doesn't exist let mut into: Vec = vec![]; assert!(matches!( origin.read_file_into("DNE", &mut into), Err::<(), origin::ReadFileIntoError>(origin::ReadFileIntoError::FileNotFound), )); assert_eq!(into.len(), 0); let descrs = store .all_descrs() .expect("all_descrs called") .into_iter() .collect::>>(); assert_eq!(1, descrs.len()); assert_eq!(&descr, descrs[0].as_ref().unwrap()); } }