2023-06-14 18:22:10 +00:00
|
|
|
use crate::error::unexpected::{self, Intoable, Mappable};
|
2023-07-16 14:09:37 +00:00
|
|
|
use crate::{origin, util};
|
2024-02-16 20:22:39 +00:00
|
|
|
use std::{collections, sync};
|
2023-07-06 17:19:51 +00:00
|
|
|
|
2023-06-17 14:04:26 +00:00
|
|
|
#[derive(Clone)]
|
2024-02-16 20:22:39 +00:00
|
|
|
struct DescrState {
|
|
|
|
current_tree: gix_hash::ObjectId,
|
2023-05-11 09:54:25 +00:00
|
|
|
}
|
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
#[derive(thiserror::Error, Clone, Debug, PartialEq)]
|
|
|
|
enum GetObjectError {
|
|
|
|
#[error("unavailable due to server-side issue")]
|
|
|
|
Unavailable,
|
2023-05-11 09:54:25 +00:00
|
|
|
|
|
|
|
#[error(transparent)]
|
2023-06-14 18:22:10 +00:00
|
|
|
Unexpected(#[from] unexpected::Error),
|
2023-05-11 09:54:25 +00:00
|
|
|
}
|
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
#[derive(Default)]
|
|
|
|
pub struct Proxy {
|
|
|
|
client: reqwest::Client,
|
|
|
|
state: sync::RwLock<collections::HashMap<origin::Descr, DescrState>>,
|
2023-05-11 09:54:25 +00:00
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
// to prevent syncing the same origin more than once at a time, but still allow hitting that
|
|
|
|
// origin in during a sync.
|
2023-05-11 09:54:25 +00:00
|
|
|
sync_guard: sync::Mutex<collections::HashMap<origin::Descr, ()>>,
|
|
|
|
}
|
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
impl Proxy {
|
|
|
|
pub fn new() -> Proxy {
|
|
|
|
Proxy::default()
|
2023-05-11 09:54:25 +00:00
|
|
|
}
|
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
fn deconstruct_descr(descr: &origin::Descr) -> (&origin::descr::GitUrl, &str) {
|
2023-07-19 20:36:29 +00:00
|
|
|
let origin::Descr::Git {
|
2023-07-16 13:10:02 +00:00
|
|
|
ref url,
|
|
|
|
ref branch_name,
|
2023-07-19 20:36:29 +00:00
|
|
|
} = descr;
|
2024-02-16 20:22:39 +00:00
|
|
|
(url, branch_name)
|
2023-07-16 13:10:02 +00:00
|
|
|
}
|
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
fn construct_url(
|
|
|
|
url: &origin::descr::GitUrl,
|
|
|
|
sub_path: &str,
|
|
|
|
) -> unexpected::Result<reqwest::Url> {
|
|
|
|
let mut url: reqwest::Url = {
|
|
|
|
url.parsed
|
|
|
|
.to_string()
|
|
|
|
.parse()
|
|
|
|
.or_unexpected_while("parsing url as reqwest url")?
|
|
|
|
};
|
|
|
|
|
|
|
|
let new_path = url
|
|
|
|
.path()
|
|
|
|
.parse::<std::path::PathBuf>()
|
|
|
|
.or_unexpected_while("parsing url path")?
|
|
|
|
.join(sub_path);
|
|
|
|
|
|
|
|
url.set_path(
|
|
|
|
new_path
|
|
|
|
.to_str()
|
|
|
|
.or_unexpected_while("converting new path to string")?,
|
|
|
|
);
|
|
|
|
|
|
|
|
Ok(url)
|
2023-05-11 09:54:25 +00:00
|
|
|
}
|
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
async fn get_tip_commit_hash(
|
2023-07-05 17:03:51 +00:00
|
|
|
&self,
|
|
|
|
descr: &origin::Descr,
|
2024-02-16 20:22:39 +00:00
|
|
|
) -> Result<gix_hash::ObjectId, origin::SyncError> {
|
|
|
|
let (url, branch_name) = Self::deconstruct_descr(descr);
|
|
|
|
|
|
|
|
let refs_url =
|
|
|
|
Self::construct_url(url, "info/refs").or_unexpected_while("constructing refs url")?;
|
|
|
|
|
|
|
|
// when fetching refs we assume that any issue indicates that the origin itself
|
|
|
|
// (and therefore the URL) has some kind of issue.
|
|
|
|
let refs = self
|
|
|
|
.client
|
|
|
|
.get(refs_url)
|
|
|
|
.send()
|
|
|
|
.await
|
|
|
|
.or(Err(origin::SyncError::InvalidURL))?
|
|
|
|
.error_for_status()
|
|
|
|
.or(Err(origin::SyncError::InvalidURL))?
|
|
|
|
.text()
|
|
|
|
.await
|
|
|
|
.or(Err(origin::SyncError::InvalidURL))?;
|
|
|
|
|
|
|
|
let full_ref = format!("refs/heads/{}", branch_name);
|
|
|
|
for line in refs.lines() {
|
|
|
|
if !line.ends_with(full_ref.as_str()) {
|
|
|
|
continue;
|
2023-07-05 17:03:51 +00:00
|
|
|
}
|
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
return gix_hash::ObjectId::from_hex(
|
|
|
|
line.split_ascii_whitespace()
|
|
|
|
.next()
|
|
|
|
.ok_or(origin::SyncError::InvalidURL)?
|
|
|
|
.as_bytes(),
|
|
|
|
)
|
|
|
|
.or(Err(origin::SyncError::InvalidURL));
|
2023-07-05 17:03:51 +00:00
|
|
|
}
|
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
Err(origin::SyncError::InvalidBranchName)
|
|
|
|
}
|
2023-07-05 17:03:51 +00:00
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
async fn get_object(
|
|
|
|
&self,
|
|
|
|
descr: &origin::Descr,
|
|
|
|
oid: &gix_hash::ObjectId,
|
|
|
|
expect_kind: gix_object::Kind,
|
|
|
|
) -> Result<util::BoxByteStream, GetObjectError> {
|
|
|
|
let hex = oid.to_string();
|
|
|
|
let (url, _) = Self::deconstruct_descr(descr);
|
|
|
|
|
|
|
|
let object_url =
|
|
|
|
Self::construct_url(url, format!("objects/{}/{}", &hex[..2], &hex[2..]).as_str())
|
|
|
|
.or_unexpected_while("constructing refs url")?;
|
|
|
|
|
|
|
|
let mut loose_object = self
|
|
|
|
.client
|
|
|
|
.get(object_url)
|
|
|
|
.send()
|
|
|
|
.await
|
|
|
|
.or(Err(GetObjectError::Unavailable))?
|
|
|
|
.error_for_status()
|
|
|
|
.map(|res| {
|
|
|
|
use async_compression::tokio::bufread::ZlibDecoder;
|
|
|
|
use futures::stream::TryStreamExt;
|
|
|
|
use std::io;
|
|
|
|
|
|
|
|
tokio::io::BufReader::new(ZlibDecoder::new(tokio_util::io::StreamReader::new(
|
|
|
|
res.bytes_stream()
|
|
|
|
.map_err(|e| io::Error::new(io::ErrorKind::Other, e)),
|
|
|
|
)))
|
|
|
|
})
|
|
|
|
.or(Err(GetObjectError::Unavailable))?;
|
|
|
|
|
|
|
|
use tokio::io::AsyncBufReadExt;
|
|
|
|
let mut header = Vec::<u8>::new();
|
|
|
|
loose_object
|
|
|
|
.read_until(0, &mut header)
|
|
|
|
.await
|
|
|
|
.or(Err(GetObjectError::Unavailable))?;
|
2023-07-05 17:03:51 +00:00
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
let (kind, _, _) =
|
|
|
|
gix_object::decode::loose_header(&header).or(Err(GetObjectError::Unavailable))?;
|
2023-07-05 17:03:51 +00:00
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
if kind != expect_kind {
|
|
|
|
return Err(GetObjectError::Unavailable);
|
|
|
|
}
|
2023-07-05 17:03:51 +00:00
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
Ok(util::BoxByteStream::from_async_read(loose_object))
|
2023-07-05 17:03:51 +00:00
|
|
|
}
|
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
async fn get_commit_tree(
|
|
|
|
&self,
|
|
|
|
descr: &origin::Descr,
|
|
|
|
commit_hash: &gix_hash::ObjectId,
|
|
|
|
) -> Result<gix_hash::ObjectId, origin::SyncError> {
|
|
|
|
let commit_object_bytes = self
|
|
|
|
.get_object(descr, commit_hash, gix_object::Kind::Commit)
|
|
|
|
.await
|
|
|
|
.map_err(|e| match e {
|
|
|
|
GetObjectError::Unavailable => origin::SyncError::Unavailable,
|
|
|
|
GetObjectError::Unexpected(_) => e.into_unexpected().into(),
|
|
|
|
})?
|
|
|
|
.read_to_end()
|
|
|
|
.await
|
|
|
|
.or(Err(origin::SyncError::Unavailable))?;
|
2023-05-11 09:54:25 +00:00
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
let commit_object = gix_object::CommitRef::from_bytes(commit_object_bytes.as_ref())
|
|
|
|
.or(Err(origin::SyncError::Unavailable))?;
|
2023-05-11 09:54:25 +00:00
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
Ok(commit_object.tree())
|
|
|
|
}
|
2023-05-11 09:54:25 +00:00
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
async fn get_tree_entry(
|
|
|
|
&self,
|
|
|
|
descr: &origin::Descr,
|
|
|
|
tree_hash: &gix_hash::ObjectId,
|
|
|
|
entry_name: &str,
|
|
|
|
) -> Result<gix_object::tree::Entry, origin::GetFileError> {
|
|
|
|
let tree_object_bytes = self
|
|
|
|
.get_object(descr, tree_hash, gix_object::Kind::Tree)
|
|
|
|
.await
|
|
|
|
.map_err(|e| match e {
|
|
|
|
GetObjectError::Unavailable => origin::GetFileError::Unavailable,
|
|
|
|
GetObjectError::Unexpected(_) => e.into_unexpected().into(),
|
|
|
|
})?
|
|
|
|
.read_to_end()
|
|
|
|
.await
|
|
|
|
.or(Err(origin::GetFileError::Unavailable))?;
|
2023-05-11 09:54:25 +00:00
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
let tree_object = gix_object::TreeRef::from_bytes(tree_object_bytes.as_ref())
|
|
|
|
.or(Err(origin::GetFileError::Unavailable))?;
|
2023-05-11 09:54:25 +00:00
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
for entry in tree_object.entries {
|
|
|
|
if entry.filename == entry_name {
|
|
|
|
return Ok(entry.into());
|
|
|
|
}
|
|
|
|
}
|
2023-05-11 09:54:25 +00:00
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
Err(origin::GetFileError::FileNotFound)
|
2023-05-11 09:54:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
impl origin::Store for Proxy {
|
|
|
|
fn sync(&self, descr: &origin::Descr) -> util::BoxFuture<'_, Result<(), origin::SyncError>> {
|
|
|
|
let descr = descr.clone();
|
|
|
|
Box::pin(async move {
|
2024-01-21 15:18:31 +00:00
|
|
|
// attempt to lock this descr for syncing, doing so within a new scope so the mutex
|
|
|
|
// isn't actually being held for the whole method duration.
|
|
|
|
let is_already_syncing = {
|
|
|
|
self.sync_guard
|
|
|
|
.lock()
|
|
|
|
.unwrap()
|
|
|
|
.insert(descr.clone(), ())
|
|
|
|
.is_some()
|
|
|
|
};
|
|
|
|
|
|
|
|
if is_already_syncing {
|
|
|
|
return Err(origin::SyncError::AlreadyInProgress);
|
|
|
|
}
|
2023-05-11 09:54:25 +00:00
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
// perform the rest of the work within this closure, so we can be sure that the guard
|
|
|
|
// lock is released no matter what.
|
|
|
|
let res = async {
|
|
|
|
let commit_hash = self.get_tip_commit_hash(&descr).await?;
|
|
|
|
let current_tree = self.get_commit_tree(&descr, &commit_hash).await?;
|
|
|
|
self.state
|
|
|
|
.write()
|
|
|
|
.unwrap()
|
|
|
|
.insert(descr.clone(), DescrState { current_tree });
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
.await;
|
2023-05-11 09:54:25 +00:00
|
|
|
|
2024-01-21 15:18:31 +00:00
|
|
|
self.sync_guard.lock().unwrap().remove(&descr);
|
2024-02-16 20:22:39 +00:00
|
|
|
res
|
|
|
|
})
|
2023-05-11 09:54:25 +00:00
|
|
|
}
|
|
|
|
|
2024-01-10 09:42:48 +00:00
|
|
|
fn get_file(
|
|
|
|
&self,
|
2023-07-16 14:09:37 +00:00
|
|
|
descr: &origin::Descr,
|
|
|
|
path: &str,
|
2024-02-16 20:22:39 +00:00
|
|
|
) -> util::BoxFuture<'_, Result<util::BoxByteStream, origin::GetFileError>> {
|
2024-01-30 17:47:09 +00:00
|
|
|
let descr = descr.clone();
|
|
|
|
let path = path.to_string();
|
|
|
|
Box::pin(async move {
|
2024-02-16 20:22:39 +00:00
|
|
|
let current_state = self
|
|
|
|
.state
|
|
|
|
.read()
|
|
|
|
.unwrap()
|
|
|
|
.get(&descr)
|
|
|
|
.ok_or(origin::GetFileError::DescrNotSynced)?
|
|
|
|
.clone();
|
|
|
|
|
|
|
|
let path = path
|
|
|
|
.as_str()
|
|
|
|
.parse::<std::path::PathBuf>()
|
|
|
|
.or_unexpected_while("parsing path")?;
|
|
|
|
|
|
|
|
let path_parts = path.iter().collect::<Vec<&std::ffi::OsStr>>();
|
|
|
|
|
|
|
|
let path_parts_len = path_parts.len();
|
|
|
|
|
|
|
|
if path_parts_len < 2 {
|
|
|
|
return Err(unexpected::Error::from("path has fewer than 2 parts").into());
|
|
|
|
} else if path_parts[0] != std::path::MAIN_SEPARATOR_STR {
|
|
|
|
return Err(unexpected::Error::from(format!(
|
|
|
|
"expected first path part to be separator, found {:?}",
|
|
|
|
path_parts[0]
|
|
|
|
))
|
|
|
|
.into());
|
2024-01-16 13:31:32 +00:00
|
|
|
}
|
2023-05-11 09:54:25 +00:00
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
let mut tree_hash = current_state.current_tree;
|
|
|
|
|
|
|
|
// The first part is "/" (main separator), and the last is the file name itself.
|
|
|
|
// Everything in between (if any) should be directories, so navigate those.
|
|
|
|
for dir_name in path_parts[1..path_parts_len - 1].iter() {
|
|
|
|
let entry = self
|
|
|
|
.get_tree_entry(
|
|
|
|
&descr,
|
|
|
|
&tree_hash,
|
|
|
|
dir_name
|
|
|
|
.to_str()
|
|
|
|
.map_unexpected_while(|| format!("decoding dir name {dir_name:?}"))?,
|
|
|
|
)
|
|
|
|
.await?;
|
2023-08-03 08:02:03 +00:00
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
if !entry.mode.is_tree() {
|
|
|
|
return Err(origin::GetFileError::FileNotFound);
|
2023-08-03 08:02:03 +00:00
|
|
|
}
|
2024-02-16 20:22:39 +00:00
|
|
|
|
|
|
|
tree_hash = entry.oid;
|
2023-07-08 13:19:31 +00:00
|
|
|
}
|
2023-07-05 17:03:51 +00:00
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
let file_name = {
|
|
|
|
let file_name = path_parts[path_parts_len - 1];
|
|
|
|
file_name
|
|
|
|
.to_str()
|
|
|
|
.map_unexpected_while(|| format!("decoding file name {file_name:?}"))?
|
|
|
|
};
|
2023-05-11 09:54:25 +00:00
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
let entry = self.get_tree_entry(&descr, &tree_hash, file_name).await?;
|
|
|
|
|
|
|
|
// TODO handle symlinks
|
|
|
|
if entry.mode.is_tree() {
|
|
|
|
return Err(origin::GetFileError::PathIsDirectory);
|
|
|
|
} else if !entry.mode.is_blob() {
|
|
|
|
return Err(unexpected::Error::from(format!(
|
|
|
|
"can't handle entry {} of mode {}",
|
|
|
|
entry.filename,
|
|
|
|
entry.mode.as_str()
|
|
|
|
))
|
|
|
|
.into());
|
|
|
|
}
|
2023-05-11 09:54:25 +00:00
|
|
|
|
2024-02-16 20:22:39 +00:00
|
|
|
self.get_object(&descr, &entry.oid, gix_object::Kind::Blob)
|
|
|
|
.await
|
|
|
|
.map_err(|e| match e {
|
|
|
|
GetObjectError::Unavailable => origin::GetFileError::Unavailable,
|
|
|
|
GetObjectError::Unexpected(_) => e
|
|
|
|
.into_unexpected_while(format!("getting object for entry {:?}", entry))
|
|
|
|
.into(),
|
|
|
|
})
|
|
|
|
})
|
2023-05-11 09:54:25 +00:00
|
|
|
}
|
|
|
|
}
|