From 796b90e91d528f5659e03395366e820e9858b849 Mon Sep 17 00:00:00 2001 From: Brian Picciano Date: Thu, 11 May 2023 11:54:25 +0200 Subject: [PATCH] move git store to its own file --- src/origin/store.rs | 393 +--------------------------------------- src/origin/store/git.rs | 381 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 383 insertions(+), 391 deletions(-) create mode 100644 src/origin/store/git.rs diff --git a/src/origin/store.rs b/src/origin/store.rs index 23a4ee4..23e3e5b 100644 --- a/src/origin/store.rs +++ b/src/origin/store.rs @@ -1,6 +1,8 @@ use crate::origin; use std::error::Error; +pub mod git; + #[derive(Clone, Copy)] pub struct Limits { // TODO storage limits @@ -60,394 +62,3 @@ pub trait Store { fn get(&self, descr: origin::Descr) -> Result, GetError>; fn all_descrs(&self) -> AllDescrsResult>; } - -pub mod git { - - use crate::origin; - use crate::origin::store; - - use std::error::Error; - use std::path::{Path, PathBuf}; - use std::{collections, fs, io, sync}; - - pub struct Origin { - descr: origin::Descr, - repo: gix::ThreadSafeRepository, - tree_object_id: gix::ObjectId, - } - - impl origin::Origin for sync::Arc { - fn descr(&self) -> &origin::Descr { - &self.descr - } - - fn read_file_into( - &self, - path: &str, - into: &mut dyn std::io::Write, - ) -> Result<(), origin::ReadFileIntoError> { - let mut clean_path = Path::new(path); - clean_path = clean_path.strip_prefix("/").unwrap_or(clean_path); - - let repo = self.repo.to_thread_local(); - - let file_object = repo - .find_object(self.tree_object_id) - .map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))? - .peel_to_tree() - .map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))? - .lookup_entry_by_path(clean_path) - .map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))? - .ok_or(origin::ReadFileIntoError::FileNotFound)? - .object() - .map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))?; - - into.write_all(file_object.data.as_ref()) - .map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))?; - - Ok(()) - } - } - - #[derive(thiserror::Error, Debug)] - enum GetOriginError { - #[error("invalid branch name")] - InvalidBranchName, - - #[error(transparent)] - Unexpected(Box), - } - - /// git::Store implements the Store trait for any Descr::Git based Origins. If any non-git - /// Descrs are used then this implementation will panic. - pub struct Store { - dir_path: PathBuf, - - // to prevent against syncing the same origin more than once at a time, but still allowing - // more than one origin to be syncing at a time - sync_guard: sync::Mutex>, - - origins: sync::RwLock>>, - } - - impl Store { - pub fn new(dir_path: PathBuf) -> io::Result { - fs::create_dir_all(&dir_path)?; - Ok(Store { - dir_path, - sync_guard: sync::Mutex::new(collections::HashMap::new()), - origins: sync::RwLock::new(collections::HashMap::new()), - }) - } - - fn repo_path(&self, descr: &origin::Descr) -> PathBuf { - self.dir_path.join(descr.id()) - } - - fn descr_file_path(&self, descr_id: &str) -> PathBuf { - self.dir_path.join(descr_id).join("descr.json") - } - - fn branch_ref(&self, branch_name: &str) -> String { - format!("origin/{branch_name}") - } - - fn get_origin( - &self, - repo: gix::Repository, - descr: origin::Descr, - ) -> Result, GetOriginError> { - let origin::Descr::Git { - ref branch_name, .. - } = descr; - - let commit_object_id = repo - .try_find_reference(&self.branch_ref(branch_name)) - .map_err(|e| GetOriginError::Unexpected(Box::from(e)))? - .ok_or(GetOriginError::InvalidBranchName)? - .peel_to_id_in_place() - .map_err(|e| GetOriginError::Unexpected(Box::from(e)))? - .detach(); - - let tree_object_id = repo - .find_object(commit_object_id) - .map_err(|e| GetOriginError::Unexpected(Box::from(e)))? - .try_to_commit_ref() - .map_err(|e| GetOriginError::Unexpected(Box::from(e)))? - .tree(); - - return Ok(sync::Arc::from(Origin { - descr, - repo: repo.into(), - tree_object_id, - })); - } - - fn sync_inner( - &self, - descr: &origin::Descr, - _limits: store::Limits, - ) -> Result { - use gix::clone::Error as gixCloneErr; - use gix::progress::Discard; - - let should_interrupt = &core::sync::atomic::AtomicBool::new(false); - let repo_path = &self.repo_path(&descr); - - // if the path doesn't exist then use the gix clone feature to clone it into the - // directory. - if fs::read_dir(repo_path).is_err() { - fs::create_dir_all(repo_path) - .map_err(|e| store::SyncError::Unexpected(Box::from(e)))?; - - let origin::Descr::Git { - ref url, - ref branch_name, - } = descr; - - let (repo, _) = gix::prepare_clone_bare(url.clone(), repo_path) - .map_err(|e| match e { - gixCloneErr::Init(gix::init::Error::InvalidBranchName { .. }) => { - store::SyncError::InvalidBranchName - } - gixCloneErr::UrlParse(_) | gixCloneErr::CanonicalizeUrl { .. } => { - store::SyncError::InvalidURL - } - _ => store::SyncError::Unexpected(Box::from(e)), - })? - .fetch_only(Discard, should_interrupt) - .map_err(|_| store::SyncError::InvalidURL)?; - - // Check to make sure the branch name exists - // TODO if this fails we should delete repo_path - repo.try_find_reference(&self.branch_ref(branch_name)) - .map_err(|e| store::SyncError::Unexpected(Box::from(e)))? - .ok_or(store::SyncError::InvalidBranchName)?; - - // Add the descr to the repo directory, so we can know the actual descr later - // TODO if this fails we should delete repo_path - let descr_file = fs::File::create(self.descr_file_path(descr.id().as_ref())) - .map_err(|e| store::SyncError::Unexpected(Box::from(e)))?; - - serde_json::to_writer(descr_file, &descr) - .map_err(|e| store::SyncError::Unexpected(Box::from(e)))?; - - return Ok(repo); - } - - let direction = gix::remote::Direction::Fetch; - - let repo = - gix::open(repo_path).map_err(|e| store::SyncError::Unexpected(Box::from(e)))?; - - let remote = repo - .find_default_remote(direction) - .ok_or_else(|| store::SyncError::Unexpected(Box::from("no default configured")))? - .map_err(|e| store::SyncError::Unexpected(Box::from(e)))?; - - remote - .connect(direction) - .map_err(|e| store::SyncError::Unexpected(Box::from(e)))? - .prepare_fetch(Discard, Default::default()) - .map_err(|e| store::SyncError::Unexpected(Box::from(e)))? - .receive(Discard, should_interrupt) - .map_err(|e| store::SyncError::Unexpected(Box::from(e)))?; - - Ok(repo) - } - } - - impl super::Store for Store { - type Origin<'store> = sync::Arc - where Self: 'store; - - type AllDescrsIter<'store> = Box> + 'store> - where Self: 'store; - - fn sync( - &self, - descr: origin::Descr, - limits: store::Limits, - ) -> Result<(), store::SyncError> { - // attempt to lock this descr for syncing, doing so within a new scope so the mutex - // isn't actually being held for the whole method duration. - let is_already_syncing = { - self.sync_guard - .lock() - .unwrap() - .insert(descr.clone(), ()) - .is_some() - }; - - if is_already_syncing { - return Err(store::SyncError::AlreadyInProgress); - } - - let res = self.sync_inner(&descr, limits); - - self.sync_guard.lock().unwrap().remove(&descr); - - let repo = match res { - Ok(repo) => repo, - Err(e) => return Err(e), - }; - - // repo is synced at this point (though the sync lock is still held), just gotta create - // the origin and store it. - // - // TODO this is a bit of a memory leak, but by the time we get - // to that point this should all be backed by something which isn't local storage - // anyway. - - // calling this while the sync lock is held isn't ideal, but it's convenient and - // shouldn't be too terrible generally - let origin = self.get_origin(repo, descr.clone()).map_err(|e| match e { - GetOriginError::InvalidBranchName => store::SyncError::InvalidBranchName, - GetOriginError::Unexpected(e) => store::SyncError::Unexpected(e), - })?; - - let mut origins = self.origins.write().unwrap(); - (*origins).insert(descr, origin); - - Ok(()) - } - - fn get(&self, descr: origin::Descr) -> Result, store::GetError> { - { - let origins = self.origins.read().unwrap(); - if let Some(origin) = origins.get(&descr) { - return Ok(origin.clone()); - } - } - - let repo_path = self.repo_path(&descr); - - fs::read_dir(&repo_path).map_err(|e| match e.kind() { - io::ErrorKind::NotFound => store::GetError::NotFound, - _ => store::GetError::Unexpected(Box::from(e)), - })?; - - let repo = - gix::open(&repo_path).map_err(|e| store::GetError::Unexpected(Box::from(e)))?; - - let origin = self.get_origin(repo, descr.clone()).map_err(|e| match e { - // it's not expected that the branch name is invalid at this point, it must have - // existed for sync to have been successful. - GetOriginError::InvalidBranchName => store::GetError::Unexpected(Box::from(e)), - GetOriginError::Unexpected(e) => store::GetError::Unexpected(e), - })?; - - let mut origins = self.origins.write().unwrap(); - (*origins).insert(descr, origin.clone()); - - Ok(origin) - } - - fn all_descrs(&self) -> store::AllDescrsResult> { - Ok(Box::from( - fs::read_dir(&self.dir_path) - .map_err(|e| store::AllDescrsError::Unexpected(Box::from(e)))? - .map( - |dir_entry_res: io::Result| -> store::AllDescrsResult { - let descr_id: String = dir_entry_res - .map_err(|e| store::AllDescrsError::Unexpected(Box::from(e)))? - .file_name() - .to_str() - .ok_or_else(|| { - store::AllDescrsError::Unexpected(Box::from( - "couldn't convert os string to &str", - )) - })? - .into(); - - let descr_file_path = self.descr_file_path(descr_id.as_ref()); - - // TODO it's possible that opening the file will fail if syncing is - // still ongoing, as writing the descr file is the last step after - // initial sync has succeeded. - let descr_file = fs::File::open(descr_file_path) - .map_err(|e| store::AllDescrsError::Unexpected(Box::from(e)))?; - - let descr = serde_json::from_reader(descr_file) - .map_err(|e| store::AllDescrsError::Unexpected(Box::from(e)))?; - - Ok(descr) - }, - ), - )) - } - } - - #[cfg(test)] - mod tests { - use crate::origin::store; - use crate::origin::store::Store; - use crate::origin::{self, Origin}; - use std::sync; - use tempdir::TempDir; - - #[test] - fn basic() { - let tmp_dir = TempDir::new("origin_store_git").unwrap(); - - let curr_dir = format!("file://{}", std::env::current_dir().unwrap().display()); - - let descr = origin::Descr::Git { - url: curr_dir.clone(), - branch_name: String::from("master"), - }; - - let other_descr = origin::Descr::Git { - url: curr_dir.clone(), - branch_name: String::from("some_other_branch"), - }; - - let limits = store::Limits {}; - - let store = super::Store::new(tmp_dir.path().to_path_buf()).expect("store created"); - - store - .sync(descr.clone(), limits) - .expect("sync should succeed"); - store - .sync(descr.clone(), limits) - .expect("second sync should succeed"); - - assert!(matches!( - store.get(other_descr), - Err::, store::GetError>(store::GetError::NotFound), - )); - - let origin = store.get(descr.clone()).expect("origin retrieved"); - - assert_eq!(&descr, origin.descr()); - - let assert_write = |path: &str| { - let mut into: Vec = vec![]; - origin - .read_file_into(path, &mut into) - .expect("write should succeed"); - assert!(into.len() > 0); - }; - - assert_write("src/lib.rs"); - assert_write("/src/lib.rs"); - - // File doesn't exist - let mut into: Vec = vec![]; - assert!(matches!( - origin.read_file_into("DNE", &mut into), - Err::<(), origin::ReadFileIntoError>(origin::ReadFileIntoError::FileNotFound), - )); - assert_eq!(into.len(), 0); - - let descrs = store - .all_descrs() - .expect("all_descrs callsed") - .collect::>>(); - - assert_eq!(1, descrs.len()); - assert_eq!(&descr, descrs[0].as_ref().unwrap()); - } - } -} diff --git a/src/origin/store/git.rs b/src/origin/store/git.rs new file mode 100644 index 0000000..ee05e06 --- /dev/null +++ b/src/origin/store/git.rs @@ -0,0 +1,381 @@ +use crate::origin; +use crate::origin::store; + +use std::error::Error; +use std::path::{Path, PathBuf}; +use std::{collections, fs, io, sync}; + +pub struct Origin { + descr: origin::Descr, + repo: gix::ThreadSafeRepository, + tree_object_id: gix::ObjectId, +} + +impl origin::Origin for sync::Arc { + fn descr(&self) -> &origin::Descr { + &self.descr + } + + fn read_file_into( + &self, + path: &str, + into: &mut dyn std::io::Write, + ) -> Result<(), origin::ReadFileIntoError> { + let mut clean_path = Path::new(path); + clean_path = clean_path.strip_prefix("/").unwrap_or(clean_path); + + let repo = self.repo.to_thread_local(); + + let file_object = repo + .find_object(self.tree_object_id) + .map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))? + .peel_to_tree() + .map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))? + .lookup_entry_by_path(clean_path) + .map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))? + .ok_or(origin::ReadFileIntoError::FileNotFound)? + .object() + .map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))?; + + into.write_all(file_object.data.as_ref()) + .map_err(|e| origin::ReadFileIntoError::Unexpected(Box::from(e)))?; + + Ok(()) + } +} + +#[derive(thiserror::Error, Debug)] +enum GetOriginError { + #[error("invalid branch name")] + InvalidBranchName, + + #[error(transparent)] + Unexpected(Box), +} + +/// git::Store implements the Store trait for any Descr::Git based Origins. If any non-git +/// Descrs are used then this implementation will panic. +pub struct Store { + dir_path: PathBuf, + + // to prevent against syncing the same origin more than once at a time, but still allowing + // more than one origin to be syncing at a time + sync_guard: sync::Mutex>, + + origins: sync::RwLock>>, +} + +impl Store { + pub fn new(dir_path: PathBuf) -> io::Result { + fs::create_dir_all(&dir_path)?; + Ok(Store { + dir_path, + sync_guard: sync::Mutex::new(collections::HashMap::new()), + origins: sync::RwLock::new(collections::HashMap::new()), + }) + } + + fn repo_path(&self, descr: &origin::Descr) -> PathBuf { + self.dir_path.join(descr.id()) + } + + fn descr_file_path(&self, descr_id: &str) -> PathBuf { + self.dir_path.join(descr_id).join("descr.json") + } + + fn branch_ref(&self, branch_name: &str) -> String { + format!("origin/{branch_name}") + } + + fn get_origin( + &self, + repo: gix::Repository, + descr: origin::Descr, + ) -> Result, GetOriginError> { + let origin::Descr::Git { + ref branch_name, .. + } = descr; + + let commit_object_id = repo + .try_find_reference(&self.branch_ref(branch_name)) + .map_err(|e| GetOriginError::Unexpected(Box::from(e)))? + .ok_or(GetOriginError::InvalidBranchName)? + .peel_to_id_in_place() + .map_err(|e| GetOriginError::Unexpected(Box::from(e)))? + .detach(); + + let tree_object_id = repo + .find_object(commit_object_id) + .map_err(|e| GetOriginError::Unexpected(Box::from(e)))? + .try_to_commit_ref() + .map_err(|e| GetOriginError::Unexpected(Box::from(e)))? + .tree(); + + return Ok(sync::Arc::from(Origin { + descr, + repo: repo.into(), + tree_object_id, + })); + } + + fn sync_inner( + &self, + descr: &origin::Descr, + _limits: store::Limits, + ) -> Result { + use gix::clone::Error as gixCloneErr; + use gix::progress::Discard; + + let should_interrupt = &core::sync::atomic::AtomicBool::new(false); + let repo_path = &self.repo_path(&descr); + + // if the path doesn't exist then use the gix clone feature to clone it into the + // directory. + if fs::read_dir(repo_path).is_err() { + fs::create_dir_all(repo_path) + .map_err(|e| store::SyncError::Unexpected(Box::from(e)))?; + + let origin::Descr::Git { + ref url, + ref branch_name, + } = descr; + + let (repo, _) = gix::prepare_clone_bare(url.clone(), repo_path) + .map_err(|e| match e { + gixCloneErr::Init(gix::init::Error::InvalidBranchName { .. }) => { + store::SyncError::InvalidBranchName + } + gixCloneErr::UrlParse(_) | gixCloneErr::CanonicalizeUrl { .. } => { + store::SyncError::InvalidURL + } + _ => store::SyncError::Unexpected(Box::from(e)), + })? + .fetch_only(Discard, should_interrupt) + .map_err(|_| store::SyncError::InvalidURL)?; + + // Check to make sure the branch name exists + // TODO if this fails we should delete repo_path + repo.try_find_reference(&self.branch_ref(branch_name)) + .map_err(|e| store::SyncError::Unexpected(Box::from(e)))? + .ok_or(store::SyncError::InvalidBranchName)?; + + // Add the descr to the repo directory, so we can know the actual descr later + // TODO if this fails we should delete repo_path + let descr_file = fs::File::create(self.descr_file_path(descr.id().as_ref())) + .map_err(|e| store::SyncError::Unexpected(Box::from(e)))?; + + serde_json::to_writer(descr_file, &descr) + .map_err(|e| store::SyncError::Unexpected(Box::from(e)))?; + + return Ok(repo); + } + + let direction = gix::remote::Direction::Fetch; + + let repo = gix::open(repo_path).map_err(|e| store::SyncError::Unexpected(Box::from(e)))?; + + let remote = repo + .find_default_remote(direction) + .ok_or_else(|| store::SyncError::Unexpected(Box::from("no default configured")))? + .map_err(|e| store::SyncError::Unexpected(Box::from(e)))?; + + remote + .connect(direction) + .map_err(|e| store::SyncError::Unexpected(Box::from(e)))? + .prepare_fetch(Discard, Default::default()) + .map_err(|e| store::SyncError::Unexpected(Box::from(e)))? + .receive(Discard, should_interrupt) + .map_err(|e| store::SyncError::Unexpected(Box::from(e)))?; + + Ok(repo) + } +} + +impl super::Store for Store { + type Origin<'store> = sync::Arc + where Self: 'store; + + type AllDescrsIter<'store> = Box> + 'store> + where Self: 'store; + + fn sync(&self, descr: origin::Descr, limits: store::Limits) -> Result<(), store::SyncError> { + // attempt to lock this descr for syncing, doing so within a new scope so the mutex + // isn't actually being held for the whole method duration. + let is_already_syncing = { + self.sync_guard + .lock() + .unwrap() + .insert(descr.clone(), ()) + .is_some() + }; + + if is_already_syncing { + return Err(store::SyncError::AlreadyInProgress); + } + + let res = self.sync_inner(&descr, limits); + + self.sync_guard.lock().unwrap().remove(&descr); + + let repo = match res { + Ok(repo) => repo, + Err(e) => return Err(e), + }; + + // repo is synced at this point (though the sync lock is still held), just gotta create + // the origin and store it. + // + // TODO this is a bit of a memory leak, but by the time we get + // to that point this should all be backed by something which isn't local storage + // anyway. + + // calling this while the sync lock is held isn't ideal, but it's convenient and + // shouldn't be too terrible generally + let origin = self.get_origin(repo, descr.clone()).map_err(|e| match e { + GetOriginError::InvalidBranchName => store::SyncError::InvalidBranchName, + GetOriginError::Unexpected(e) => store::SyncError::Unexpected(e), + })?; + + let mut origins = self.origins.write().unwrap(); + (*origins).insert(descr, origin); + + Ok(()) + } + + fn get(&self, descr: origin::Descr) -> Result, store::GetError> { + { + let origins = self.origins.read().unwrap(); + if let Some(origin) = origins.get(&descr) { + return Ok(origin.clone()); + } + } + + let repo_path = self.repo_path(&descr); + + fs::read_dir(&repo_path).map_err(|e| match e.kind() { + io::ErrorKind::NotFound => store::GetError::NotFound, + _ => store::GetError::Unexpected(Box::from(e)), + })?; + + let repo = gix::open(&repo_path).map_err(|e| store::GetError::Unexpected(Box::from(e)))?; + + let origin = self.get_origin(repo, descr.clone()).map_err(|e| match e { + // it's not expected that the branch name is invalid at this point, it must have + // existed for sync to have been successful. + GetOriginError::InvalidBranchName => store::GetError::Unexpected(Box::from(e)), + GetOriginError::Unexpected(e) => store::GetError::Unexpected(e), + })?; + + let mut origins = self.origins.write().unwrap(); + (*origins).insert(descr, origin.clone()); + + Ok(origin) + } + + fn all_descrs(&self) -> store::AllDescrsResult> { + Ok(Box::from( + fs::read_dir(&self.dir_path) + .map_err(|e| store::AllDescrsError::Unexpected(Box::from(e)))? + .map( + |dir_entry_res: io::Result| -> store::AllDescrsResult { + let descr_id: String = dir_entry_res + .map_err(|e| store::AllDescrsError::Unexpected(Box::from(e)))? + .file_name() + .to_str() + .ok_or_else(|| { + store::AllDescrsError::Unexpected(Box::from( + "couldn't convert os string to &str", + )) + })? + .into(); + + let descr_file_path = self.descr_file_path(descr_id.as_ref()); + + // TODO it's possible that opening the file will fail if syncing is + // still ongoing, as writing the descr file is the last step after + // initial sync has succeeded. + let descr_file = fs::File::open(descr_file_path) + .map_err(|e| store::AllDescrsError::Unexpected(Box::from(e)))?; + + let descr = serde_json::from_reader(descr_file) + .map_err(|e| store::AllDescrsError::Unexpected(Box::from(e)))?; + + Ok(descr) + }, + ), + )) + } +} + +#[cfg(test)] +mod tests { + use crate::origin::store; + use crate::origin::store::Store; + use crate::origin::{self, Origin}; + use std::sync; + use tempdir::TempDir; + + #[test] + fn basic() { + let tmp_dir = TempDir::new("origin_store_git").unwrap(); + + let curr_dir = format!("file://{}", std::env::current_dir().unwrap().display()); + + let descr = origin::Descr::Git { + url: curr_dir.clone(), + branch_name: String::from("master"), + }; + + let other_descr = origin::Descr::Git { + url: curr_dir.clone(), + branch_name: String::from("some_other_branch"), + }; + + let limits = store::Limits {}; + + let store = super::Store::new(tmp_dir.path().to_path_buf()).expect("store created"); + + store + .sync(descr.clone(), limits) + .expect("sync should succeed"); + store + .sync(descr.clone(), limits) + .expect("second sync should succeed"); + + assert!(matches!( + store.get(other_descr), + Err::, store::GetError>(store::GetError::NotFound), + )); + + let origin = store.get(descr.clone()).expect("origin retrieved"); + + assert_eq!(&descr, origin.descr()); + + let assert_write = |path: &str| { + let mut into: Vec = vec![]; + origin + .read_file_into(path, &mut into) + .expect("write should succeed"); + assert!(into.len() > 0); + }; + + assert_write("src/lib.rs"); + assert_write("/src/lib.rs"); + + // File doesn't exist + let mut into: Vec = vec![]; + assert!(matches!( + origin.read_file_into("DNE", &mut into), + Err::<(), origin::ReadFileIntoError>(origin::ReadFileIntoError::FileNotFound), + )); + assert_eq!(into.len(), 0); + + let descrs = store + .all_descrs() + .expect("all_descrs callsed") + .collect::>>(); + + assert_eq!(1, descrs.len()); + assert_eq!(&descr, descrs[0].as_ref().unwrap()); + } +}