Talk about K2V specifics

This commit is contained in:
Alex Auvolat 2023-01-13 13:51:39 +01:00
parent d44e8366e7
commit 065d6e1e06
No known key found for this signature in database
GPG Key ID: 0E496D15096376BE
2 changed files with 62 additions and 12 deletions

View File

@ -780,17 +780,73 @@
\begin{frame}
\frametitle{K2V Design}
\begin{itemize}
\item A new, custom, minimal API
\item A new, custom, minimal API\\
\vspace{.5em}
\begin{itemize}
\item Single-item operations
\item Operations on ranges and batches of items
\item Polling operations to help implement a PubSub pattern
\end{itemize}
\vspace{1em}
\item<2-> Exposes the partitoning mechanism of Garage\\
K2V = partition key / sort key / value (like Dynamo)
\vspace{1em}
\item<3-> Coordination-free, CRDT-friendly (inspired by Riak)\\
\item<3-> Weakly consistent, CRDT-friendly\\
$\to$ no support for transactions (not ACID)
\vspace{1em}
\item<4-> Cryptography-friendly: values are binary blobs
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Handling concurrent values}
\textbf{How to handle concurrency?} Example:
\vspace{1em}
\begin{enumerate}
\item Client $A$ reads the initial value of a key, $x_0$
\vspace{1em}
\item<2-> Client $B$ also reads the initial value $x_0$ of that key
\vspace{1em}
\item<3-> Client $A$ modifies $x_0$, and writes a new value $x_1$
\vspace{1em}
\item<4-> Client $B$ also modifies $x_0$, and writes a new value $x'_1$,\\
without having a chance to first read $x_1$\\
\vspace{1em}
$\to$ what should the final state be?
\end{enumerate}
\end{frame}
\begin{frame}
\frametitle{Handling concurrent values}
\begin{itemize}
\item If we keep only $x_1$ or $x'_1$, we risk \textbf{loosing application data}
\vspace{1.5em}
\item Values are opaque binary blobs, \textbf{K2V cannot resolve conflicts} by itself\\
(e.g. by implementing a CRDT)
\vspace{1.5em}
\item Solution: \textbf{we keep both!}\\
$\to$ the value of the key is now $\{x_1, x'_1\}$\\
$\to$ the client application can decide how to resolve conflicts on the next read
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Keeping track of causality}
How does K2V know that $x_1$ and $x'_1$ are concurrent?
\vspace{1em}
\begin{itemize}
\item $read()$ returns \textbf{a set of values} and an associated \textbf{causality token}\\
\vspace{1.5em}
\item When calling $write()$, the client sends \textbf{the causality token from its last read}
\vspace{1.5em}
\item The causality token represents the set of values \textbf{already seen by the client}\\
$\to$ those values are the \textbf{causal past} of the write operation\\
$\to$ K2V can keep concurrent values and overwrite all ones in the causal past
\vspace{1.5em}
\item Internally, the causality token is \textbf{a vector clock}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{Application: an e-mail storage server}
\begin{center}
@ -800,7 +856,7 @@
\begin{frame}
\frametitle{A new model for building resilient software}
\begin{itemize}
\begin{enumerate}
\item Design a data model suited to K2V\\
{\footnotesize (see Cassandra docs on porting SQL data models to Cassandra)}
\vspace{1em}
@ -810,22 +866,16 @@
\item Store opaque binary blobs to provide End-to-End Encryption\\
\end{itemize}
\vspace{1em}
\item Store big blobs (files) in S3
\item Store big blobs (files) using the S3 API
\vspace{1em}
\item Let Garage manage sharding, replication, failover, etc.
\end{itemize}
\end{enumerate}
\end{frame}
\begin{frame}
\frametitle{Research perspectives}
\begin{itemize}
\item Write about Garage's global architecture \emph{(paper in progress)}
\vspace{1em}
\item Measure and improve Garage's performances
\vspace{1em}
\item Discuss the optimal layout algorithm, provide proofs
\vspace{1em}
\item Write about our proposed architecture for (E2EE) apps over K2V+S3
\item TODO
\end{itemize}
\end{frame}