Remove old code related to static, it's not needed anymore

This commit is contained in:
Brian Picciano 2022-05-21 14:07:14 -06:00
parent 55eb40d4bb
commit f3340ae5f4
139 changed files with 0 additions and 15895 deletions

View File

@ -1,5 +0,0 @@
#!/bin/bash
postsDir=static/src/_posts
echo $postsDir/$(ls -1 static/src/_posts | sort -n | tail -n1)

View File

@ -1,85 +0,0 @@
#!/bin/sh
set -e
numargs=2
function usage {
echo "Usage: $0 [options] <post title> <post description>
Options:
-i Create image directory
-d \"YYYY-MM-DD\" Custom date to use instead of today
-V Verbose
-x Dry run, don't make any changes
"
exit 1
}
td=$(date "+%Y-%m-%d")
while [ "$(echo $1 | head -c1)" = '-' -o "$#" -gt $numargs ]; do
arg="$1"
shift
case "$arg" in
"-i") IMG_DIR=1;;
"-d") td=$1; shift;;
"-V") VERBOSE=1;;
"-x") DRY_RUN=1;;
"--no-editor") NO_EDITOR=1;;
*)
echo "Unknown option '$arg'"
usage;;
esac
done
if [ "$#" != $numargs ]; then usage; fi
if [ ! -z $VERBOSE ]; then set -x; fi
title="$1"
clean_title=$(echo "$title" |\
tr '[:upper:]' '[:lower:]' |\
sed 's/[^a-z0-9 ]//g' |\
tr ' ' '-' \
)
description="$2"
if $(echo "$description" | grep -q '[^.$!?]$'); then
echo 'Description needs to be a complete sentence, with ending punctuation.'
exit 1
fi
postFileName=static/src/_posts/$td-$clean_title.md
echo "Creating $postFileName"
postContent=$(cat <<EOF
---
title: >-
$title
description: >-
$description
#tags: tech art crypto
---
Write stuff here, title will automatically be added as an h1
## Secondary header
Title is already h1 so all sub-titles should be h2 or below.
EOF
)
if [ -z $DRY_RUN ]; then
echo "$postContent" > "$postFileName"
fi
if [ ! -z $IMG_DIR ]; then
imgDirName="img/$clean_title"
echo "Creating directory $imgDirName"
if [ -z $DRY_RUN ]; then
mkdir -p "$imgDirName"
fi
fi
if [ -z $DRY_RUN ] && [ -z $NO_EDITOR ]; then
exec $EDITOR "$postFileName"
fi

5
static/.gitignore vendored
View File

@ -1,5 +0,0 @@
.bundle
.sass-cache
*.gem
.jekyll-metadata
_site

View File

@ -1,12 +0,0 @@
source "https://rubygems.org"
gem "jekyll", "3.9.0"
gem "kramdown-parser-gfm", "1.1.0"
# specify nokogiri because 1.11.1 breaks nix for some reason
gem "nokogiri", "1.10.10"
gem "jekyll-feed", "0.15.1"
gem "jekyll-seo-tag", "2.7.1"
gem "jekyll-relative-links", "0.6.1"
gem "jekyll-sitemap", "1.4.0"

View File

@ -1,82 +0,0 @@
GEM
remote: https://rubygems.org/
specs:
addressable (2.8.0)
public_suffix (>= 2.0.2, < 5.0)
colorator (1.1.0)
concurrent-ruby (1.1.9)
em-websocket (0.5.2)
eventmachine (>= 0.12.9)
http_parser.rb (~> 0.6.0)
eventmachine (1.2.7)
ffi (1.15.3)
forwardable-extended (2.6.0)
http_parser.rb (0.6.0)
i18n (0.9.5)
concurrent-ruby (~> 1.0)
jekyll (3.9.0)
addressable (~> 2.4)
colorator (~> 1.0)
em-websocket (~> 0.5)
i18n (~> 0.7)
jekyll-sass-converter (~> 1.0)
jekyll-watch (~> 2.0)
kramdown (>= 1.17, < 3)
liquid (~> 4.0)
mercenary (~> 0.3.3)
pathutil (~> 0.9)
rouge (>= 1.7, < 4)
safe_yaml (~> 1.0)
jekyll-feed (0.15.1)
jekyll (>= 3.7, < 5.0)
jekyll-relative-links (0.6.1)
jekyll (>= 3.3, < 5.0)
jekyll-sass-converter (1.5.2)
sass (~> 3.4)
jekyll-seo-tag (2.7.1)
jekyll (>= 3.8, < 5.0)
jekyll-sitemap (1.4.0)
jekyll (>= 3.7, < 5.0)
jekyll-watch (2.2.1)
listen (~> 3.0)
kramdown (2.3.1)
rexml
kramdown-parser-gfm (1.1.0)
kramdown (~> 2.0)
liquid (4.0.3)
listen (3.6.0)
rb-fsevent (~> 0.10, >= 0.10.3)
rb-inotify (~> 0.9, >= 0.9.10)
mercenary (0.3.6)
mini_portile2 (2.4.0)
nokogiri (1.10.10)
mini_portile2 (~> 2.4.0)
pathutil (0.16.2)
forwardable-extended (~> 2.6)
public_suffix (4.0.6)
rb-fsevent (0.11.0)
rb-inotify (0.10.1)
ffi (~> 1.0)
rexml (3.2.5)
rouge (3.26.0)
safe_yaml (1.0.5)
sass (3.7.4)
sass-listen (~> 4.0.0)
sass-listen (4.0.0)
rb-fsevent (~> 0.9, >= 0.9.4)
rb-inotify (~> 0.9, >= 0.9.7)
PLATFORMS
ruby
DEPENDENCIES
jekyll (= 3.9.0)
jekyll-feed (= 0.15.1)
jekyll-relative-links (= 0.6.1)
jekyll-seo-tag (= 2.7.1)
jekyll-sitemap (= 1.4.0)
kramdown-parser-gfm (= 1.1.0)
nokogiri (= 1.10.10)
BUNDLED WITH
2.1.4

View File

@ -1,38 +0,0 @@
{pkgs}: rec {
depInputs = [ pkgs.imagemagick pkgs.exiftool pkgs.bundler pkgs.bundix ];
depShell = pkgs.stdenv.mkDerivation {
name = "mediocre-blog-static-dep-shell";
buildInputs = depInputs;
};
jekyllEnv = pkgs.bundlerEnv {
name = "jekyllEnv";
ruby = pkgs.ruby;
gemdir = ./.;
};
build = pkgs.stdenv.mkDerivation {
name = "mediocre-blog-static";
src = ./src;
buildPhase = "${jekyllEnv}/bin/jekyll build";
installPhase = "mv _site $out";
};
serve = pkgs.writeScriptBin "static-serve" ''
#!/bin/sh
exec ${jekyllEnv}/bin/jekyll serve \
-s ./src \
-d ./_site \
-w -I -D \
-P 4002
'';
allInputs = depInputs ++ [ jekyllEnv serve ];
shell = pkgs.stdenv.mkDerivation {
name = "mediocre-blog-static-shell";
buildInputs = allInputs;
};
}

View File

@ -1,340 +0,0 @@
{
addressable = {
dependencies = ["public_suffix"];
groups = ["default"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "022r3m9wdxljpbya69y2i3h9g3dhhfaqzidf95m6qjzms792jvgp";
type = "gem";
};
version = "2.8.0";
};
colorator = {
groups = ["default" "jekyll_plugins"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "0f7wvpam948cglrciyqd798gdc6z3cfijciavd0dfixgaypmvy72";
type = "gem";
};
version = "1.1.0";
};
concurrent-ruby = {
groups = ["default"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "0nwad3211p7yv9sda31jmbyw6sdafzmdi2i2niaz6f0wk5nq9h0f";
type = "gem";
};
version = "1.1.9";
};
em-websocket = {
dependencies = ["eventmachine" "http_parser.rb"];
groups = ["default" "jekyll_plugins"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "1mg1mx735a0k1l8y14ps2mxdwhi5r01ikydf34b0sp60v66nvbkb";
type = "gem";
};
version = "0.5.2";
};
eventmachine = {
groups = ["default" "jekyll_plugins"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "0wh9aqb0skz80fhfn66lbpr4f86ya2z5rx6gm5xlfhd05bj1ch4r";
type = "gem";
};
version = "1.2.7";
};
ffi = {
groups = ["default"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "1wgvaclp4h9y8zkrgz8p2hqkrgr4j7kz0366mik0970w532cbmcq";
type = "gem";
};
version = "1.15.3";
};
forwardable-extended = {
groups = ["default" "jekyll_plugins"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "15zcqfxfvsnprwm8agia85x64vjzr2w0xn9vxfnxzgcv8s699v0v";
type = "gem";
};
version = "2.6.0";
};
"http_parser.rb" = {
groups = ["default" "jekyll_plugins"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "15nidriy0v5yqfjsgsra51wmknxci2n2grliz78sf9pga3n0l7gi";
type = "gem";
};
version = "0.6.0";
};
i18n = {
dependencies = ["concurrent-ruby"];
groups = ["default" "jekyll_plugins"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "038qvz7kd3cfxk8bvagqhakx68pfbnmghpdkx7573wbf0maqp9a3";
type = "gem";
};
version = "0.9.5";
};
jekyll = {
dependencies = ["addressable" "colorator" "em-websocket" "i18n" "jekyll-sass-converter" "jekyll-watch" "kramdown" "liquid" "mercenary" "pathutil" "rouge" "safe_yaml"];
groups = ["default" "jekyll_plugins"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "0ci1v1mpad36191vzbm1050dxccwv6ky4yhdyvskmqxa6cf6v21j";
type = "gem";
};
version = "3.9.0";
};
jekyll-feed = {
dependencies = ["jekyll"];
groups = ["default" "jekyll_plugins"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "1zxqkrnix0xiw98531h5ga6h69jhzlx2jh9qhvcl67p8nq3sgza9";
type = "gem";
};
version = "0.15.1";
};
jekyll-relative-links = {
dependencies = ["jekyll"];
groups = ["default" "jekyll_plugins"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "0vfx90ajxyj24lz406k3pqknlbzy8nqs7wpz0in4ps9rggsh24yi";
type = "gem";
};
version = "0.6.1";
};
jekyll-sass-converter = {
dependencies = ["sass"];
groups = ["default" "jekyll_plugins"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "008ikh5fk0n6ri54mylcl8jn0mq8p2nfyfqif2q3pp0lwilkcxsk";
type = "gem";
};
version = "1.5.2";
};
jekyll-seo-tag = {
dependencies = ["jekyll"];
groups = ["default"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "0fsi75hymk2wswy216fs224p5ycrzjw1kshw1bsl5czhv42wr2w3";
type = "gem";
};
version = "2.7.1";
};
jekyll-sitemap = {
dependencies = ["jekyll"];
groups = ["default" "jekyll_plugins"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "0622rwsn5i0m5xcyzdn86l68wgydqwji03lqixdfm1f1xdfqrq0d";
type = "gem";
};
version = "1.4.0";
};
jekyll-watch = {
dependencies = ["listen"];
groups = ["default" "jekyll_plugins"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "1qd7hy1kl87fl7l0frw5qbn22x7ayfzlv9a5ca1m59g0ym1ysi5w";
type = "gem";
};
version = "2.2.1";
};
kramdown = {
dependencies = ["rexml"];
groups = ["default"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "0jdbcjv4v7sj888bv3vc6d1dg4ackkh7ywlmn9ln2g9alk7kisar";
type = "gem";
};
version = "2.3.1";
};
kramdown-parser-gfm = {
dependencies = ["kramdown"];
groups = ["default"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "0a8pb3v951f4x7h968rqfsa19c8arz21zw1vaj42jza22rap8fgv";
type = "gem";
};
version = "1.1.0";
};
liquid = {
groups = ["default" "jekyll_plugins"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "0zhg5ha8zy8zw9qr3fl4wgk4r5940n4128xm2pn4shpbzdbsj5by";
type = "gem";
};
version = "4.0.3";
};
listen = {
dependencies = ["rb-fsevent" "rb-inotify"];
groups = ["default"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "1dq7yd4s9accpjiq0f92sgikw3whc5wnjn065laggkpqcqgx75gh";
type = "gem";
};
version = "3.6.0";
};
mercenary = {
groups = ["default" "jekyll_plugins"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "10la0xw82dh5mqab8bl0dk21zld63cqxb1g16fk8cb39ylc4n21a";
type = "gem";
};
version = "0.3.6";
};
mini_portile2 = {
groups = ["default"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "15zplpfw3knqifj9bpf604rb3wc1vhq6363pd6lvhayng8wql5vy";
type = "gem";
};
version = "2.4.0";
};
nokogiri = {
dependencies = ["mini_portile2"];
groups = ["default"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "0xmf60nj5kg9vaj5bysy308687sgmkasgx06vbbnf94p52ih7si2";
type = "gem";
};
version = "1.10.10";
};
pathutil = {
dependencies = ["forwardable-extended"];
groups = ["default" "jekyll_plugins"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "12fm93ljw9fbxmv2krki5k5wkvr7560qy8p4spvb9jiiaqv78fz4";
type = "gem";
};
version = "0.16.2";
};
public_suffix = {
groups = ["default"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "1xqcgkl7bwws1qrlnmxgh8g4g9m10vg60bhlw40fplninb3ng6d9";
type = "gem";
};
version = "4.0.6";
};
rb-fsevent = {
groups = ["default"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "1qsx9c4jr11vr3a9s5j83avczx9qn9rjaf32gxpc2v451hvbc0is";
type = "gem";
};
version = "0.11.0";
};
rb-inotify = {
dependencies = ["ffi"];
groups = ["default" "jekyll_plugins"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "1jm76h8f8hji38z3ggf4bzi8vps6p7sagxn3ab57qc0xyga64005";
type = "gem";
};
version = "0.10.1";
};
rexml = {
groups = ["default"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "08ximcyfjy94pm1rhcx04ny1vx2sk0x4y185gzn86yfsbzwkng53";
type = "gem";
};
version = "3.2.5";
};
rouge = {
groups = ["default"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "0b4b300i3m4m4kw7w1n9wgxwy16zccnb7271miksyzd0wq5b9pm3";
type = "gem";
};
version = "3.26.0";
};
safe_yaml = {
groups = ["default" "jekyll_plugins"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "0j7qv63p0vqcd838i2iy2f76c3dgwzkiz1d1xkg7n0pbnxj2vb56";
type = "gem";
};
version = "1.0.5";
};
sass = {
dependencies = ["sass-listen"];
groups = ["default" "jekyll_plugins"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "0p95lhs0jza5l7hqci1isflxakz83xkj97lkvxl919is0lwhv2w0";
type = "gem";
};
version = "3.7.4";
};
sass-listen = {
dependencies = ["rb-fsevent" "rb-inotify"];
groups = ["default" "jekyll_plugins"];
platforms = [];
source = {
remotes = ["https://rubygems.org"];
sha256 = "0xw3q46cmahkgyldid5hwyiwacp590zj2vmswlll68ryvmvcp7df";
type = "gem";
};
version = "4.0.0";
};
}

View File

@ -1,39 +0,0 @@
#!/bin/sh
# requires imagemagick and perl-image-exiftool
set -e
widths="500 1000 1500 2000 2500 3000"
for img in $@; do
echo $img
# make target directories
dir=$(dirname "$img") # gets directory
for targetWidth in $widths; do
mkdir -p $dir/${targetWidth}px
done
# get width
width=$(identify "$img" | awk '{print $3}' | cut -dx -f1)
echo -e "\toriginal width: $width"
echo -e "\tremoving metadata"
exiftool -all= "$img"
rm -f "${img}_original" # exiftool makes a copy of the original, delete it
for targetWidth in $widths; do
targetFile=$dir/${targetWidth}px/$(basename "$img")
echo -en "\tresizing into $targetFile... "
if [ "$targetWidth" -ge "$width" ]; then
echo "skipping, original image too small"
continue
elif [ -e "$targetFile" ]; then
echo "skipping, target file exists"
continue
fi
convert "$img" -resize $targetWidth "$targetFile"
echo "done"
done
done

View File

@ -1,8 +0,0 @@
---
layout: default
---
<div style="text-align: center; margin-top:5rem;">
<h1>404</h1>
<p><strong>Page not found :(</strong></p>
</div>

View File

@ -1,34 +0,0 @@
title: Mediocre Blog
author: Brian Picciano
email: mediocregopher@gmail.com
description: >-
A mix of tech, art, travel, and who knows what else.
url: https://blog.mediocregopher.com
hub_url: https://mediocregopher.eth.link
rss: rss
highlighter: rouge
defaults:
- scope:
path: "_posts"
values:
layout: "post"
plugins:
- jekyll-feed
- jekyll-seo-tag
- jekyll-relative-links
- jekyll-sitemap
date_format: "%b %-d, %Y"
img_widths:
- 500
- 1000
- 1500
- 2000
- 2500
- 3000
feed:
tags: true

View File

@ -1,7 +0,0 @@
<footer>
<p class="license light">
Unless otherwised specified, all works are licensed under the
<a href="{{ '/assets/wtfpl.txt' | relative_url}}">WTFPL</a>.
</p>
</footer>

View File

@ -1,12 +0,0 @@
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
{%- seo -%}
<link rel="stylesheet" href="{{ "/assets/normalize.css" | relative_url }}">
<link rel="stylesheet" href="{{ "/assets/skeleton.css" | relative_url }}">
<link rel="stylesheet" href="{{ "/assets/friendly.css" | relative_url }}">
<link rel="stylesheet" href="{{ "/assets/main.css" | relative_url }}">
<link rel="stylesheet" href="{{ "/assets/fontawesome/css/all.css" | relative_url }}">
{%- feed_meta -%}
</head>

View File

@ -1,29 +0,0 @@
<header id="title-header" role="banner">
<div class="row">
<div class="seven columns" style="margin-bottom: 3rem;">
<h1 class="title">
<a href="{{ "/" | relative_url }}">{{ site.title | escape }}</a>
</h1>
<div class="light social">
<span>By {{ site.author | escape }}</span>
<span>
Even more @
<a href="{{ site.hub_url }}" target="_blank">{{ site.hub_url }}</a>
</span>
</div>
</div>
{%- if!page.nofollow != true %}
<div class="five columns light">
<span style="display:block; margin-bottom:0.5rem;">Get notified when new posts are published!</span>
<a href="{{ "/follow.html" | relative_url }}"><button class="button-primary">
<i class="far fa-envelope"></i>
Follow
</button></a>
<a href="{{ "/feed.xml" | relative_url }}"><button class="button">
<i class="fas fa-rss"></i>
RSS
</button></a>
</div>
{% endif -%}
</div>
</header>

View File

@ -1,43 +0,0 @@
<div style="
box-sizing: border-box;
text-align: center;
padding-left: 2em;
padding-right: 2em;
margin-bottom: 1em;
{%- if include.float %}
float: {{ include.float }};
{% endif -%}
{%- if include.float or include.inline %}
max-width: 49%;
{% endif -%}
{%- if include.inline %}
display: inline-block;
{% endif -%}
">
<a href="/img/{{ include.dir }}/{{ include.file }}" target="_blank">
<picture>
{%- if include.width %}
{%- for targetWidth in site.img_widths reversed -%}
{% if include.width <= targetWidth %}{% continue %}{% endif %}
{%- if targetWidth > 1000 %}
<source media="(min-width: 1000px) and (min-resolution: {{ targetWidth | divided_by: 1000.0 }}dppx)"
{%- elsif targetWidth > 500 %}
<source media="(min-width: 500px), (min-resolution: 1.1dppx)"
{%- else %}
<source
{% endif %}
srcset="/img/{{ include.dir }}/{{ targetWidth }}px/{{ include.file }}"
>
{%- endfor %}
{%- endif %}
<img style="max-height: 60vh;"
{% if include.width < 1000 %}
src="/img/{{ include.dir }}/{{ include.file }}"
{% else %}
src="/img/{{ include.dir }}/1000px/{{ include.file }}"
{% endif %}
alt="{{ include.descr }}" />
</picture>
</a>
{%- if include.descr %}<br/><em>{{ include.descr }}</em>{%- endif %}
</div>

View File

@ -1,10 +0,0 @@
---
layout: default
---
{% capture body %}```{{ page.lang | default: "go" }}
{% include_relative {{ page.include }} %}```{% endcapture %}
<br/><a href="{{ page.include }}">Raw source file</a>
{{ body | markdownify }}

View File

@ -1,22 +0,0 @@
<!DOCTYPE html>
<html lang="{{ page.lang | default: site.lang | default: "en" }}">
{%- include head.html -%}
<body>
<div class="container">
{%- include header.html -%}
<main aria-label="Content">
{{ content }}
</main>
{%- include footer.html -%}
</div>
</body>
</html>

View File

@ -1,13 +0,0 @@
---
layout: default
---
<header id="post-header">
<h1 id="post-headline" itemprop="name headline">
{{ page.title | escape }}
</h1>
</header>
<div id="post-content">
{{ content }}
</div>

View File

@ -1,80 +0,0 @@
---
layout: default
---
<article itemscope itemtype="http://schema.org/BlogPosting">
<header id="post-header">
<h1 id="post-headline" itemprop="name headline">
{{ page.title | escape }}
</h1>
<div class="light">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<span itemprop="name">{{ site.author }}</span>
</span>
<!---->
<time datetime="{{ page.date | date_to_xmlschema }}" itemprop="datePublished">
{{ page.date | date: site.date_format }}
</time>
&nbsp;&nbsp;
{%- if page.updated %}
<time datetime="{{ page.updated | date_to_xmlschema }}" itemprop="dateModified">
(Updated {{ page.updated | date: site.date_format }})
</time>
&nbsp;&nbsp;
{% endif -%}
<description itemprop="about"><em>{{ page.description }}</em></description>
</div>
</header>
{% if page.series %}
{% assign foundThis = false %}
{% for post in site.posts reversed %}
{% if post.series == page.series %}
{% if post.url == page.url %}
{% assign foundThis = true %}
{% elsif foundThis %}
{% assign next = post %}
{% break %}
{% else %}
{% assign prev = post %}
{% endif %}
{% endif %}
{% endfor %}
{% if prev or next %}
<p class="light"><em>
This post is part of a series:<br/>
{% if prev %}
Previously: <a href="{{ prev.url | relative_url }}">{{ prev.title }}</a></br>
{% endif %}
{% if next %}
Next: <a href="{{ next.url | relative_url }}">{{ next.title }}</a></br>
{% endif %}
</em></p>
{% endif %}
{% endif %}
<div id="post-content" itemprop="articleBody">
{{ content }}
</div>
{% if page.git_repo %}
<p class="light">
<em>To check this project out locally:</em></br>
<pre><code>git clone {{ page.git_repo }}
{% if page.git_commit %}git checkout {{ page.git_commit }}{% endif %}</code></pre>
</p>
{% endif %}
{% if prev or next %}
<p class="light"><em>
If you liked this post, consider checking out other posts in the series:<br/>
{% if prev %}
Previously: <a href="{{ prev.url | relative_url }}">{{ prev.title }}</a></br>
{% endif %}
{% if next %}
Next: <a href="{{ next.url | relative_url }}">{{ next.title }}</a></br>
{% endif %}
</em></p>
{% endif %}
</article>

View File

@ -1,257 +0,0 @@
---
title: "Erlang, tcp sockets, and active true"
description: >-
Using `{active:once}` isn't always the best way to handle connections.
tags: tech
---
If you don't know erlang then [you're missing out][0]. If you do know erlang,
you've probably at some point done something with tcp sockets. Erlang's highly
concurrent model of execution lends itself well to server programs where a high
number of active connections is desired. Each thread can autonomously handle its
single client, greatly simplifying the logic of the whole application while
still retaining [great performance characteristics][1].
## Background
For an erlang thread which owns a single socket there are three different ways
to receive data off of that socket. These all revolve around the `active`
[setopts][2] flag. A socket can be set to one of:
* `{active,false}` - All data must be obtained through [recv/2][3] calls. This
amounts to syncronous socket reading.
* `{active,true}` - All data on the socket gets sent to the controlling thread
as a normal erlang message. It is the thread's
responsibility to keep up with the buffered data in the
message queue. This amounts to asyncronous socket reading.
* `{active,once}` - When set the socket is placed in `{active,true}` for a
single packet. That is, once set the thread can expect a
single message to be sent to when data comes in. To receive
any more data off of the socket the socket must either be
read from using [recv/2][3] or be put in `{active,once}` or
`{active,true}`.
## Which to use?
Many (most?) tutorials advocate using `{active,once}` in your application
\[0]\[1]\[2]. This has to do with usability and security. When in `{active,true}`
it's possible for a client to flood the connection faster than the receiving
process will process those messages, potentially eating up a lot of memory in
the VM. However, if you want to be able to receive both tcp data messages as
well as other messages from other erlang processes at the same time you can't
use `{active,false}`. So `{active,once}` is generally preferred because it
deals with both of these problems quite well.
## Why not to use `{active,once}`
Here's what your classic `{active,once}` enabled tcp socket implementation will
probably look like:
```erlang
-module(tcp_test).
-compile(export_all).
-define(TCP_OPTS, [
binary,
{packet, raw},
{nodelay,true},
{active, false},
{reuseaddr, true},
{keepalive,true},
{backlog,500}
]).
%Start listening
listen(Port) ->
{ok, L} = gen_tcp:listen(Port, ?TCP_OPTS),
?MODULE:accept(L).
%Accept a connection
accept(L) ->
{ok, Socket} = gen_tcp:accept(L),
?MODULE:read_loop(Socket),
io:fwrite("Done reading, connection was closed\n"),
?MODULE:accept(L).
%Read everything it sends us
read_loop(Socket) ->
inet:setopts(Socket, [{active, once}]),
receive
{tcp, _, _} ->
do_stuff_here,
?MODULE:read_loop(Socket);
{tcp_closed, _}-> donezo;
{tcp_error, _, _} -> donezo
end.
```
This code isn't actually usable for a production system; it doesn't even spawn a
new process for the new socket. But that's not the point I'm making. If I run it
with `tcp_test:listen(8000)`, and in other window do:
```bash
while [ 1 ]; do echo "aloha"; done | nc localhost 8000
```
We'll be flooding the the server with data pretty well. Using [eprof][4] we can
get an idea of how our code performs, and where the hang-ups are:
```erlang
1> eprof:start().
{ok,<0.34.0>}
2> P = spawn(tcp_test,listen,[8000]).
<0.36.0>
3> eprof:start_profiling([P]).
profiling
4> running_the_while_loop.
running_the_while_loop
5> eprof:stop_profiling().
profiling_stopped
6> eprof:analyze(procs,[{sort,time}]).
****** Process <0.36.0> -- 100.00 % of profiled time ***
FUNCTION CALLS % TIME [uS / CALLS]
-------- ----- --- ---- [----------]
prim_inet:type_value_2/2 6 0.00 0 [ 0.00]
....snip....
prim_inet:enc_opts/2 6 0.00 8 [ 1.33]
prim_inet:setopts/2 12303599 1.85 1466319 [ 0.12]
tcp_test:read_loop/1 12303598 2.22 1761775 [ 0.14]
prim_inet:encode_opt_val/1 12303599 3.50 2769285 [ 0.23]
prim_inet:ctl_cmd/3 12303600 4.29 3399333 [ 0.28]
prim_inet:enc_opt_val/2 24607203 5.28 4184818 [ 0.17]
inet:setopts/2 12303598 5.72 4533863 [ 0.37]
erlang:port_control/3 12303600 77.13 61085040 [ 4.96]
```
eprof shows us where our process is spending the majority of its time. The `%`
column indicates percentage of time the process spent during profiling inside
any function. We can pretty clearly see that the vast majority of time was spent
inside `erlang:port_control/3`, the BIF that `inet:setopts/2` uses to switch the
socket to `{active,once}` mode. Amongst the calls which were called on every
loop, it takes up by far the most amount of time. In addition all of those other
calls are also related to `inet:setopts/2`.
I'm gonna rewrite our little listen server to use `{active,true}`, and we'll do
it all again:
```erlang
-module(tcp_test).
-compile(export_all).
-define(TCP_OPTS, [
binary,
{packet, raw},
{nodelay,true},
{active, false},
{reuseaddr, true},
{keepalive,true},
{backlog,500}
]).
%Start listening
listen(Port) ->
{ok, L} = gen_tcp:listen(Port, ?TCP_OPTS),
?MODULE:accept(L).
%Accept a connection
accept(L) ->
{ok, Socket} = gen_tcp:accept(L),
inet:setopts(Socket, [{active, true}]), %Well this is new
?MODULE:read_loop(Socket),
io:fwrite("Done reading, connection was closed\n"),
?MODULE:accept(L).
%Read everything it sends us
read_loop(Socket) ->
%inet:setopts(Socket, [{active, once}]),
receive
{tcp, _, _} ->
do_stuff_here,
?MODULE:read_loop(Socket);
{tcp_closed, _}-> donezo;
{tcp_error, _, _} -> donezo
end.
```
And the profiling results:
```erlang
1> eprof:start().
{ok,<0.34.0>}
2> P = spawn(tcp_test,listen,[8000]).
<0.36.0>
3> eprof:start_profiling([P]).
profiling
4> running_the_while_loop.
running_the_while_loop
5> eprof:stop_profiling().
profiling_stopped
6> eprof:analyze(procs,[{sort,time}]).
****** Process <0.36.0> -- 100.00 % of profiled time ***
FUNCTION CALLS % TIME [uS / CALLS]
-------- ----- --- ---- [----------]
prim_inet:enc_value_1/3 7 0.00 1 [ 0.14]
prim_inet:decode_opt_val/1 1 0.00 1 [ 1.00]
inet:setopts/2 1 0.00 2 [ 2.00]
prim_inet:setopts/2 2 0.00 2 [ 1.00]
prim_inet:enum_name/2 1 0.00 2 [ 2.00]
erlang:port_set_data/2 1 0.00 2 [ 2.00]
inet_db:register_socket/2 1 0.00 3 [ 3.00]
prim_inet:type_value_1/3 7 0.00 3 [ 0.43]
.... snip ....
prim_inet:type_opt_1/1 19 0.00 7 [ 0.37]
prim_inet:enc_value/3 7 0.00 7 [ 1.00]
prim_inet:enum_val/2 6 0.00 7 [ 1.17]
prim_inet:dec_opt_val/1 7 0.00 7 [ 1.00]
prim_inet:dec_value/2 6 0.00 10 [ 1.67]
prim_inet:enc_opt/1 13 0.00 12 [ 0.92]
prim_inet:type_opt/2 19 0.00 33 [ 1.74]
erlang:port_control/3 3 0.00 59 [ 19.67]
tcp_test:read_loop/1 20716370 100.00 12187488 [ 0.59]
```
This time our process spent almost no time at all (according to eprof, 0%)
fiddling with the socket opts. Instead it spent all of its time in the
read_loop doing the work we actually want to be doing.
## So what does this mean?
I'm by no means advocating never using `{active,once}`. The security concern is
still a completely valid concern and one that `{active,once}` mitigates quite
well. I'm simply pointing out that this mitigation has some fairly serious
performance implications which have the potential to bite you if you're not
careful, especially in cases where a socket is going to be receiving a large
amount of traffic.
## Meta
These tests were done using R15B03, but I've done similar ones in R14 and found
similar results. I have not tested R16.
* \[0] http://learnyousomeerlang.com/buckets-of-sockets
* \[1] http://www.erlang.org/doc/man/gen_tcp.html#examples
* \[2] http://erlycoder.com/25/erlang-tcp-server-tcp-client-sockets-with-gen_tcp
[0]: http://learnyousomeerlang.com/content
[1]: http://www.metabrew.com/article/a-million-user-comet-application-with-mochiweb-part-1
[2]: http://www.erlang.org/doc/man/inet.html#setopts-2
[3]: http://www.erlang.org/doc/man/gen_tcp.html#recv-2
[4]: http://www.erlang.org/doc/man/eprof.html

View File

@ -1,78 +0,0 @@
---
title: Go+
description: >-
A simple proof-of-concept script for doing go dependency management.
tags: tech
---
Compared to other languages go has some strange behavior regarding its project
root settings. If you import a library called `somelib`, go will look for a
`src/somelib` folder in all of the folders in the `$GOPATH` environment
variable. This works nicely for globally installed packages, but it makes
encapsulating a project with a specific version, or modified version, rather
tedious. Whenever you go to work on this project you'll have to add its path to
your `$GOPATH`, or add the path permanently, which could break other projects
which may use a different version of `somelib`.
My solution is in the form of a simple script I'm calling go+. go+ will search
in currrent directory and all of its parents for a file called `GOPROJROOT`. If
it finds that file in a directory, it prepends that directory's absolute path to
your `$GOPATH` and stops the search. Regardless of whether or not `GOPROJROOT`
was found go+ will passthrough all arguments to the actual go call. The
modification to `$GOPATH` will only last the duration of the call.
As an example, consider the following:
```
/tmp
/hello
GOPROJROOT
/src
/somelib/somelib.go
/hello.go
```
If `hello.go` depends on `somelib`, as long as you run go+ from `/tmp/hello` or
one of its children your project will still compile
Here is the source code for go+:
```bash
#!/bin/sh
SEARCHING_FOR=GOPROJROOT
ORIG_DIR=$(pwd)
STOPSEARCH=0
SEARCH_DIR=$ORIG_DIR
while [ $STOPSEARCH = 0 ]; do
RES=$( find $SEARCH_DIR -maxdepth 1 -type f -name $SEARCHING_FOR | \
grep -P "$SEARCHING_FOR$" | \
head -n1 )
if [ "$RES" = "" ]; then
if [ "$SEARCH_DIR" = "/" ]; then
STOPSEARCH=1
fi
cd ..
SEARCH_DIR=$(pwd)
else
export GOPATH=$SEARCH_DIR:$GOPATH
STOPSEARCH=1
fi
done
cd "$ORIG_DIR"
exec go $@
```
## UPDATE: Goat
I'm leaving this post for posterity, but go+ has some serious flaws in it. For
one, it doesn't allow for specifying the version of a dependency you want to
use. To this end, I wrote [goat][0] which does all the things go+ does, plus
real dependency management, PLUS it is built in a way that if you've been
following go's best-practices for code organization you shouldn't have to change
any of your existing code AT ALL. It's cool, check it out.
[0]: http://github.com/mediocregopher/goat

View File

@ -1,101 +0,0 @@
---
title: Generations
description: >-
A simple file distribution strategy for very large scale, high-availability
file-services.
tags: tech
---
## The problem
At [cryptic.io][cryptic] we plan on having millions of different
files, any of which could be arbitrarily chosen to be served any given time.
These files are uploaded by users at arbitrary times.
Scaling such a system is no easy task. The solution I've seen implemented in the
past involves shuffling files around on a nearly constant basis, making sure
that files which are more "popular" are on fast drives, while at the same time
making sure that no drives are at capicty and at the same time that all files,
even newly uploaded ones, are stored redundantly.
The problem with this solution is one of coordination. At any given moment the
app needs to be able to "find" a file so it can give the client a link to
download the file from one of the servers that it's on. Full-filling this simple
requirement means that all datastores/caches where information about where a
file lives need to be up-to-date at all times, and even then there are
race-conditions and network failures to contend with, while at all times the
requirements of the app evolve and change.
## A simpler solution
Let's say you want all files which get uploaded to be replicated in triplicate
in some capacity. You buy three identical hard-disks, and put each on a separate
server. As files get uploaded by clients, each file gets put on each drive
immediately. When the drives are filled (which should be at around the same
time), you stop uploading to them.
That was generation 0.
You buy three more drives, and start putting all files on them instead. This is
going to be generation 1. Repeat until you run out of money.
That's it.
### That's it?
It seems simple and obvious, and maybe it's the standard thing which is done,
but as far as I can tell no-one has written about it (though I'm probably not
searching for the right thing, let me know if this is the case!).
### Advantages
* It's so simple to implement, you could probably do it in a day if you're
starting a project from scratch
* By definition of the scheme all files are replicated in multiple places.
* Minimal information about where a file "is" needs to be stored. When a file is
uploaded all that's needed is to know what generation it is in, and then what
nodes/drives are in that generation. If the file's name is generated
server-side, then the file's generation could be *part* of its name, making
lookup even faster.
* Drives don't need to "know" about each other. What I mean by this is that
whatever is running as the receive point for file-uploads on each drive doesn't
have to coordinate with its siblings running on the other drives in the
generation. In fact it doesn't need to coordinate with anyone. You could
literally rsync files onto your drives if you wanted to. I would recommend using
[marlin][0] though :)
* Scaling is easy. When you run out of space you can simply start a new
generation. If you don't like playing that close to the chest there's nothing to
say you can't have two generations active at the same time.
* Upgrading is easy. As long as a generation is not marked-for-upload, you can
easily copy all files in the generation into a new set of bigger, badder drives,
add those drives into the generation in your code, remove the old ones, then
mark the generation as uploadable again.
* Distribution is easy. You just copy a generation's files onto a new drive in
Europe or wherever you're getting an uptick in traffic from and you're good to
go.
* Management is easy. It's trivial to find out how many times a file has been
replicated, or how many countries it's in, or what hardware it's being served
from (given you have easy access to information about specific drives).
### Caveats
The big caveat here is that this is just an idea. It has NOT been tested in
production. But we have enough faith in it that we're going to give it a shot at
[cryptic.io][cryptic]. I'll keep this page updated.
The second caveat is that this scheme does not inherently support caching. If a
file suddenly becomes super popular the world over your hard-disks might not be
able to keep up, and it's probably not feasible to have an FIO drive in *every*
generation. I think that [groupcache][1] may be the answer to this problem,
assuming your files are reasonably small, but again I haven't tested it yet.
[cryptic]: https://cryptic.io
[0]: https://github.com/cryptic-io/marlin
[1]: https://github.com/golang/groupcache

View File

@ -1,249 +0,0 @@
---
title: Namecoin, A Replacement For SSL
description: >-
If we use the namecoin chain as a DNS service we get security almost for
free, along with lots of other benefits.
tags: tech crypto
---
At [cryptic.io][cryptic] we are creating a client-side, in-browser encryption
system where a user can upload their already encrypted content to our storage
system and be 100% confident that their data can never be decrypted by anyone
but them.
One of the main problems with this approach is that the client has to be sure
that the code that's being run in their browser is the correct code; that is,
that they aren't the subject of a man-in-the-middle attack where an attacker is
turning our strong encryption into weak encryption that they could later break.
A component of our current solution is to deliver the site's javascript (and all
other assets, for that matter) using SSL encryption. This protects the files
from tampering in-between leaving our servers and being received by the client.
Unfortunately, SSL isn't 100% foolproof. This post aims to show why SSL is
faulty, and propose a solution.
## SSL
SSL is the mechanism by which web-browsers establish an encrypted connection to
web-servers. The goal of this connection is that only the destination
web-browser and the server know what data is passing between them. Anyone spying
on the connection would only see gibberish. To do this a secret key is first
established between the client and the server, and used to encrypt/decrypt all
data. As long as no-one but those parties knows that key, that data will never
be decrypted by anyone else.
SSL is what's used to establish that secret key on a per-session basis, so that
a key isn't ever re-used and so only the client and the server know it.
### Public-Private Key Cryptography
SSL is based around public-private key cryptography. In a public-private key
system, you have both a public key which is generated from a private key. The
public key can be given to anyone, but the private key must remain hidden. There
are two main uses for these two keys:
* Someone can encrypt a message with your public key, and only you (with the
private key) can decrypt it.
* You can sign a message with your private key, and anyone with your public key
can verify that it was you and not someone else who signed it.
These are both extremely useful functions, not just for internet traffic but for
any kind of communication form. Unfortunately, there remains a fundamental flaw.
At some point you must give your public key to the other person in an insecure
way. If an attacker was to intercept your message containing your public key and
swap it for their own, then all future communications could be compromised. That
attacker could create messages the other person would think are from you, and
the other person would encrypt messages meant for you but which would be
decrypt-able by the attacker.
### How does SSL work?
SSL is at its heart a public-private key system, but its aim is to be more
secure against the attack described above.
SSL uses a trust-chain to verify that a public key is the intended one. Your web
browser has a built-in set of public keys, called the root certificates, that it
implicitly trusts. These root certificates are managed by a small number of
companies designated by some agency who decides on these things.
When you receive a server's SSL certificate (its public key) that certificate
will be signed by a root certificate. You can verify that signature since you
have the root certificate's public key built into your browser. If the signature
checks out then you know a certificate authority trusts the public key the site
gave you, which means you can trust it too.
There's a bit (a lot!) more to SSL than this, but this is enough to understand
the fundamental problems with it.
### How SSL doesn't work
SSL has a few glaring problems. One, it implies we trust the companies holding
the root certificates to not be compromised. If some malicious agency was to get
ahold of a root certificate they could listen in on any connection on the
internet by swapping a site's real certificate with one they generate on the
fly. They could trivially steal any data we send on the internet.
The second problem is that it's expensive. Really expensive. If you're running a
business you'll have to shell out about $200 a year to keep your SSL certificate
signed (those signatures have an expiration date attached). Since there's very
few root authorities there's an effective monopoly on signatures, and there's
nothing we can do about it. For 200 bucks I know most people simply say "no
thanks" and go unencrypted. The solution is creating a bigger problem.
## Bitcoins
Time to switch gears, and propose a solution to the above issues: namecoins. I'm
going to first talk about what namecoins are, how they work, and why we need
them. To start with, namecoins are based on bitcoins.
If you haven't yet checked out bitcoins, [I highly encourage you to do
so][bitcoins]. They're awesome, and I think they have a chance of really
changing the way we think of and use money in the future. At the moment they're
still a bit of a novelty in the tech realm, but they're growing in popularity.
The rest of this post assumes you know more or less what bitcoins are, and how
they work.
## Namecoins
Few people actually know about bitcoins. Even fewer know that there's other
crypto-currencies besides bitcoins. Basically, developers of these alternative
currencies (altcoins, in the parlance of our times) took the original bitcoin
source code and modified it to produce a new, separate blockchain from the
original bitcoin one. The altcoins are based on the same idea as bitcoins
(namely, a chain of blocks representing all the transactions ever made), but
have slightly different characterstics.
One of these altcoins is called namecoin. Where other altcoins aim to be digital
currencies, and used as such (like bitcoins), namecoin has a different goal. The
point of namecoin is to create a global, distributed, secure key-value store.
You spend namecoins to claim arbitrary keys (once you've claimed it, you own it
for a set period of time) and to give those keys arbitrary values. Anyone else
with namecoind running can see these values.
### Why use it?
A blockchain based on a digital currency seems like a weird idea at first. I
know when I first read about it I was less than thrilled. How is this better
than a DHT? It's a key-value store, why is there a currency involved?
#### DHT
DHT stands for Distributed Hash-Table. I'm not going to go too into how they
work, but suffice it to say that they are essentially a distributed key-value
store. Like namecoin. The difference is in the operation. DHTs operate by
spreading and replicating keys and their values across nodes in a P2P mesh. They
have [lots of issues][dht] as far as security goes, the main one being that it's
fairly easy for an attacker to forge the value for a given key, and very
difficult to stop them from doing so or even to detect that it's happened.
Namecoins don't have this problem. To forge a particular key an attacker would
essentially have to create a new blockchain from a certain point in the existing
chain, and then replicate all the work put into the existing chain into that new
compromised one so that the new one is longer and other clients in the network
will except it. This is extremely non-trivial.
#### Why a currency?
To answer why a currency needs to be involved, we need to first look at how
bitcoin/namecoin work. When you take an action (send someone money, set a value
to a key) that action gets broadcast to the network. Nodes on the network
collect these actions into a block, which is just a collection of multiple
actions. Their goal is to find a hash of this new block, combined with some data
from the top-most block in the existing chain, combined with some arbitrary
data, such that the first n characters in the resulting hash are zeros (with n
constantly increasing). When they find one they broadcast it out on the network.
Assuming the block is legitimate they receive some number of coins as
compensation.
That compensation is what keeps a blockchain based currency going. If there
were no compensation there would be no reason to mine except out of goodwill, so
far fewer people would do it. Since the chain can be compromised if a malicious
group has more computing power than all legitimate miners combined, having few
legitimate miners is a serious problem.
In the case of namecoins, there's even more reason to involve a currency. Since
you have to spend money to make changes to the chain there's a disincentive for
attackers (read: idiots) to spam the chain with frivolous changes to keys.
#### Why a *new* currency?
I'll admit, it's a bit annoying to see all these altcoins popping up. I'm sure
many of them have some solid ideas backing them, but it also makes things
confusing for newcomers and dilutes the "market" of cryptocoin users; the more
users a particular chain has, the stronger it is. If we have many chains, all we
have are a bunch of weak chains.
The exception to this gripe, for me, is namecoin. When I was first thinking
about this problem my instinct was to just use the existing bitcoin blockchain
as a key-value storage. However, the maintainers of the bitcoin clients
(who are, in effect, the maintainers of the chain) don't want the bitcoin
blockchain polluted with non-commerce related data. At first I disagreed; it's a
P2P network, no-one gets to say what I can or can't use the chain for! And
that's true. But things work out better for everyone involved if there's two
chains.
Bitcoin is a currency. Namecoin is a key-value store (with a currency as its
driving force). Those are two completely different use-cases, with two
completely difference usage characteristics. And we don't know yet what those
characteristics are, or if they'll change. If the chain-maintainers have to deal
with a mingled chain we could very well be tying their hands with regards to
what they can or can't change with regards to the behavior of the chain, since
improving performance for one use-case may hurt the performance of the other.
With two separate chains the maintainers of each are free to do what they see
fit to keep their respective chains operating as smoothly as possible.
Additionally, if for some reason bitcoins fall by the wayside, namecoin will
still have a shot at continuing operation since it isn't tied to the former.
Tldr: separation of concerns.
## Namecoin as an alternative to SSL
And now to tie it all together.
There are already a number of proposed formats for standardizing how we store
data on the namecoin chain so that we can start building tools around it. I'm
not hugely concerned with the particulars of those standards, only that we can,
in some way, standardize on attaching a public key (or a fingerprint of one) to
some key on the namecoin blockchain. When you visit a website, the server
would then send both its public key and the namecoin chain key to be checked
against to the browser, and the browser would validate that the public key it
received is the same as the one on the namecoin chain.
The main issue with this is that it requires another round-trip when visiting a
website: One for DNS, and one to check the namecoin chain. And where would this
chain even be hosted?
My proposition is there would exist a number of publicly available servers
hosting a namecoind process that anyone in the world could send requests for
values on the chain. Browsers could then be made with a couple of these
hardwired in. ISPs could also run their own copies at various points in their
network to improve response-rates and decrease load on the globally public
servers. Furthermore, the paranoid could host their own and be absolutely sure
that the data they're receiving is valid.
If the above scheme sounds a lot like what we currently use for DNS, that's
because it is. In fact, one of namecoin's major goals is that it be used as a
replacement for DNS, and most of the talk around it is focused on this subject.
DNS has many of the same problems as SSL, namely single-point-of-failure and
that it's run by a centralized agency that we have to pay arbitrarily high fees
to. By switching our DNS and SSL infrastructure to use namecoin we could kill
two horribly annoying, monopolized, expensive birds with a single stone.
That's it. If we use the namecoin chain as a DNS service we get security almost
for free, along with lots of other benefits. To make this happen we need
cooperation from browser makers, and to standardize on a simple way of
retrieving DNS information from the chain that the browsers can use. The
protocol doesn't need to be very complex, I think HTTP/REST should suffice,
since the meat of the data will be embedded in the JSON value on the namecoin
chain.
If you want to contribute or learn more please check out [namecoin][nmc] and
specifically the [d namespace proposal][dns] for it.
[cryptic]: http://cryptic.io
[bitcoins]: http://vimeo.com/63502573
[dht]: http://www.globule.org/publi/SDST_acmcs2009.pdf
[nsa]: https://www.schneier.com/blog/archives/2013/09/new_nsa_leak_sh.html
[nmc]: http://dot-bit.org/Main_Page
[dns]: http://dot-bit.org/Namespace:Domain_names_v2.0

View File

@ -1,495 +0,0 @@
---
title: Diamond Square
description: >-
Tackling the problem of semi-realistic looking terrain generation in
clojure.
updated: 2018-09-06
tags: tech art
---
![terrain][terrain]
I recently started looking into the diamond-square algorithm (you can find a
great article on it [here][diamondsquare]). The following is a short-ish
walkthrough of how I tackled the problem in clojure and the results. You can
find the [leiningen][lein] repo [here][repo] and follow along within that, or
simply read the code below to get an idea.
Also, Marco ported my code into clojurescript, so you can get random terrain
in your browser. [Check it out!][marco]
```clojure
(ns diamond-square.core)
; == The Goal ==
; Create a fractal terrain generator using clojure
; == The Algorithm ==
; Diamond-Square. We start with a grid of points, each with a height of 0.
;
; 1. Take each corner point of the square, average the heights, and assign that
; to be the height of the midpoint of the square. Apply some random error to
; the midpoint.
;
; 2. Creating a line from the midpoint to each corner we get four half-diamonds.
; Average the heights of the points (with some random error) and assign the
; heights to the midpoints of the diamonds.
;
; 3. We now have four square sections, start at 1 for each of them (with
; decreasing amount of error for each iteration).
;
; This picture explains it better than I can:
; https://blog.mediocregopher.com/img/diamond-square/dsalg.png
; (http://nbickford.wordpress.com/2012/12/21/creating-fake-landscapes/dsalg/)
;
; == The Strategy ==
; We begin with a vector of vectors of numbers, and iterate over it, filling in
; spots as they become available. Our grid will have the top-left being (0,0),
; y being pointing down and x going to the right. The outermost vector
; indicating row number (y) and the inner vectors indicate the column number (x)
;
; = Utility =
; First we create some utility functions for dealing with vectors of vectors.
(defn print-m
"Prints a grid in a nice way"
[m]
(doseq [n m]
(println n)))
(defn get-m
"Gets a value at the given x,y coordinate of the grid, with [0,0] being in the
top left"
[m x y]
((m y) x))
(defn set-m
"Sets a value at the given x,y coordinat of the grid, with [0,0] being in the
top left"
[m x y v]
(assoc m y
(assoc (m y) x v)))
(defn add-m
"Like set-m, but adds the given value to the current on instead of overwriting
it"
[m x y v]
(set-m m x y
(+ (get-m m x y) v)))
(defn avg
"Returns the truncated average of all the given arguments"
[& l]
(int (/ (reduce + l) (count l))))
; = Grid size =
; Since we're starting with a blank grid we need to find out what sizes the
; grids can be. For convenience the size (height and width) should be odd, so we
; easily get a midpoint. And on each iteration we'll be halfing the grid, so
; whenever we do that the two resultrant grids should be odd and halfable as
; well, and so on.
;
; The algorithm that fits this is size = 2^n + 1, where 1 <= n. For the rest of
; this guide I'll be referring to n as the "degree" of the grid.
(def exp2-pre-compute
(vec (map #(int (Math/pow 2 %)) (range 31))))
(defn exp2
"Returns 2^n as an integer. Uses pre-computed values since we end up doing
this so much"
[n]
(exp2-pre-compute n))
(def grid-sizes
(vec (map #(inc (exp2 %)) (range 1 31))))
(defn grid-size [degree]
(inc (exp2 degree)))
; Available grid heights/widths are as follows:
;[3 5 9 17 33 65 129 257 513 1025 2049 4097 8193 16385 32769 65537 131073
;262145 524289 1048577 2097153 4194305 8388609 16777217 33554433 67108865
;134217729 268435457 536870913 1073741825])
(defn blank-grid
"Generates a grid of the given degree, filled in with zeros"
[degree]
(let [gsize (grid-size degree)]
(vec (repeat gsize
(vec (repeat gsize 0))))))
(comment
(print-m (blank-grid 3))
)
; = Coordinate Pattern (The Tricky Part) =
; We now have to figure out which coordinates need to be filled in on each pass.
; A pass is defined as a square step followed by a diamond step. The next pass
; will be the square/dimaond steps on all the smaller squares generated in the
; pass. It works out that the number of passes required to fill in the grid is
; the same as the degree of the grid, where the first pass is 1.
;
; So we can easily find patterns in the coordinates for a given degree/pass,
; I've laid out below all the coordinates for each pass for a 3rd degree grid
; (which is 9x9).
; Degree 3 Pass 1 Square
; [. . . . . . . . .]
; [. . . . . . . . .]
; [. . . . . . . . .]
; [. . . . . . . . .]
; [. . . . 1 . . . .] (4,4)
; [. . . . . . . . .]
; [. . . . . . . . .]
; [. . . . . . . . .]
; [. . . . . . . . .]
; Degree 3 Pass 1 Diamond
; [. . . . 2 . . . .] (4,0)
; [. . . . . . . . .]
; [. . . . . . . . .]
; [. . . . . . . . .]
; [2 . . . . . . . 2] (0,4) (8,4)
; [. . . . . . . . .]
; [. . . . . . . . .]
; [. . . . . . . . .]
; [. . . . 2 . . . .] (4,8)
; Degree 3 Pass 2 Square
; [. . . . . . . . .]
; [. . . . . . . . .]
; [. . 3 . . . 3 . .] (2,2) (6,2)
; [. . . . . . . . .]
; [. . . . . . . . .]
; [. . . . . . . . .]
; [. . 3 . . . 3 . .] (2,6) (6,6)
; [. . . . . . . . .]
; [. . . . . . . . .]
; Degree 3 Pass 2 Diamond
; [. . 4 . . . 4 . .] (2,0) (6,0)
; [. . . . . . . . .]
; [4 . . . 4 . . . 4] (0,2) (4,2) (8,2)
; [. . . . . . . . .]
; [. . 4 . . . 4 . .] (2,4) (6,4)
; [. . . . . . . . .]
; [4 . . . 4 . . . 4] (0,6) (4,6) (8,6)
; [. . . . . . . . .]
; [. . 4 . . . 4 . .] (2,8) (6,8)
; Degree 3 Pass 3 Square
; [. . . . . . . . .]
; [. 5 . 5 . 5 . 5 .] (1,1) (3,1) (5,1) (7,1)
; [. . . . . . . . .]
; [. 5 . 5 . 5 . 5 .] (1,3) (3,3) (5,3) (7,3)
; [. . . . . . . . .]
; [. 5 . 5 . 5 . 5 .] (1,5) (3,5) (5,5) (7,5)
; [. . . . . . . . .]
; [. 5 . 5 . 5 . 5 .] (1,7) (3,7) (5,7) (7,7)
; [. . . . . . . . .]
; Degree 3 Pass 3 Square
; [. 6 . 6 . 6 . 6 .] (1,0) (3,0) (5,0) (7,0)
; [6 . 6 . 6 . 6 . 6] (0,1) (2,1) (4,1) (6,1) (8,1)
; [. 6 . 6 . 6 . 6 .] (1,2) (3,2) (5,2) (7,2)
; [6 . 6 . 6 . 6 . 6] (0,3) (2,3) (4,3) (6,3) (8,3)
; [. 6 . 6 . 6 . 6 .] (1,4) (3,4) (5,4) (7,4)
; [6 . 6 . 6 . 6 . 6] (0,5) (2,5) (4,5) (6,5) (8,5)
; [. 6 . 6 . 6 . 6 .] (1,6) (3,6) (5,6) (7,6)
; [6 . 6 . 6 . 6 . 6] (0,7) (2,7) (4,7) (6,7) (8,7)
; [. 6 . 6 . 6 . 6 .] (1,8) (3,8) (5,8) (7,8)
;
; I make two different functions, one to give the coordinates for the square
; portion of each pass and one for the diamond portion of each pass. To find the
; actual patterns it was useful to first look only at the pattern in the
; y-coordinates, and figure out how that translated into the pattern for the
; x-coordinates.
(defn grid-square-coords
"Given a grid degree and pass number, returns all the coordinates which need
to be computed for the square step of that pass"
[degree pass]
(let [gsize (grid-size degree)
start (exp2 (- degree pass))
interval (* 2 start)
coords (map #(+ start (* interval %))
(range (exp2 (dec pass))))]
(mapcat (fn [y]
(map #(vector % y) coords))
coords)))
;
; (grid-square-coords 3 2)
; => ([2 2] [6 2] [2 6] [6 6])
(defn grid-diamond-coords
"Given a grid degree and a pass number, returns all the coordinates which need
to be computed for the diamond step of that pass"
[degree pass]
(let [gsize (grid-size degree)
interval (exp2 (- degree pass))
num-coords (grid-size pass)
coords (map #(* interval %) (range 0 num-coords))]
(mapcat (fn [y]
(if (even? (/ y interval))
(map #(vector % y) (take-nth 2 (drop 1 coords)))
(map #(vector % y) (take-nth 2 coords))))
coords)))
; (grid-diamond-coords 3 2)
; => ([2 0] [6 0] [0 2] [4 2] [8 2] [2 4] [6 4] [0 6] [4 6] [8 6] [2 8] [6 8])
; = Height Generation =
; We now work on functions which, given a coordinate, will return what value
; coordinate will have.
(defn avg-points
"Given a grid and an arbitrary number of points (of the form [x y]) returns
the average of all the given points that are on the map. Any points which are
off the map are ignored"
[m & coords]
(let [grid-size (count m)]
(apply avg
(map #(apply get-m m %)
(filter
(fn [[x y]]
(and (< -1 x) (> grid-size x)
(< -1 y) (> grid-size y)))
coords)))))
(defn error
"Returns a number between -e and e, inclusive"
[e]
(- (rand-int (inc (* 2 e))) e))
; The next function is a little weird. It primarily takes in a point, then
; figures out the distance from that point to the points we'll take the average
; of. The locf (locator function) is used to return back the actual points to
; use. For the square portion it'll be the points diagonal from the given one,
; for the diamond portion it'll be the points to the top/bottom/left/right from
; the given one.
;
; Once it has those points, it finds the average and applies the error. The
; error function is nothing more than a number between -interval and +interval,
; where interval is the distance between the given point and one of the averaged
; points. It is important that the error decreases the more passes you do, which
; is why the interval is used.
;
; The error function is what should be messed with primarily if you want to
; change what kind of terrain you generate (a giant mountain instead of
; hills/valleys, for example). The one we use is uniform for all intervals, so
; it generates a uniform terrain.
(defn- grid-fill-point
[locf m degree pass x y]
(let [interval (exp2 (- degree pass))
leftx (- x interval)
rightx (+ x interval)
upy (- y interval)
downy (+ y interval)
v (apply avg-points m
(locf x y leftx rightx upy downy))]
(add-m m x y (+ v (error interval)))))
(def grid-fill-point-square
"Given a grid, the grid's degree, the current pass number, and a point on the
grid, fills in that point with the average (plus some error) of the
appropriate corner points, and returns the resultant grid"
(partial grid-fill-point
(fn [_ _ leftx rightx upy downy]
[[leftx upy]
[rightx upy]
[leftx downy]
[rightx downy]])))
(def grid-fill-point-diamond
"Given a grid, the grid's degree, the current pass number, and a point on the
grid, fills in that point with the average (plus some error) of the
appropriate edge points, and returns the resultant grid"
(partial grid-fill-point
(fn [x y leftx rightx upy downy]
[[leftx y]
[rightx y]
[x upy]
[x downy]])))
; = Filling in the Grid =
; We finally compose the functions we've been creating to fill in the entire
; grid
(defn- grid-fill-point-passes
"Given a grid, a function to fill in coordinates, and a function to generate
those coordinates, fills in all coordinates for a given pass, returning the
resultant grid"
[m fill-f coord-f degree pass]
(reduce
(fn [macc [x y]] (fill-f macc degree pass x y))
m
(coord-f degree pass)))
(defn grid-pass
"Given a grid and a pass number, does the square then the diamond portion of
the pass"
[m degree pass]
(-> m
(grid-fill-point-passes
grid-fill-point-square grid-square-coords degree pass)
(grid-fill-point-passes
grid-fill-point-diamond grid-diamond-coords degree pass)))
; The most important function in this guide, does all the work
(defn terrain
"Given a grid degree, generates a uniformly random terrain on a grid of that
degree"
([degree]
(terrain (blank-grid degree) degree))
([m degree]
(reduce
#(grid-pass %1 degree %2)
m
(range 1 (inc degree)))))
(comment
(print-m
(terrain 5))
)
; == The Results ==
; We now have a generated terrain, probably. We should check it. First we'll
; create an ASCII representation. But to do that we'll need some utility
; functions.
(defn max-terrain-height
"Returns the maximum height found in the given terrain grid"
[m]
(reduce max
(map #(reduce max %) m)))
(defn min-terrain-height
"Returns the minimum height found in the given terrain grid"
[m]
(reduce min
(map #(reduce min %) m)))
(defn norm
"Given x in the range (A,B), normalizes it into the range (0,new-height)"
[A B new-height x]
(int (/ (* (- x A) new-height) (- B A))))
(defn normalize-terrain
"Given a terrain map and a number of \"steps\", normalizes the terrain so all
heights in it are in the range (0,steps)"
[m steps]
(let [max-height (max-terrain-height m)
min-height (min-terrain-height m)
norm-f (partial norm min-height max-height steps)]
(vec (map #(vec (map norm-f %)) m))))
; We now define which ASCII characters we want to use for which heights. The
; vector starts with the character for the lowest height and ends with the
; character for the heighest height.
(def tiles
[\~ \~ \" \" \x \x \X \$ \% \# \@])
(defn tile-terrain
"Given a terrain map, converts it into an ASCII tile map"
[m]
(vec (map #(vec (map tiles %))
(normalize-terrain m (dec (count tiles))))))
(comment
(print-m
(tile-terrain
(terrain 5)))
; [~ ~ " " x x x X % $ $ $ X X X X X X $ x x x X X X x x x x " " " ~]
; [" ~ " " x x X X $ $ $ X X X X X X X X X X X X X X x x x x " " " "]
; [" " " x x x X X % $ % $ % $ $ X X X X $ $ $ X X X X x x x x " " "]
; [" " " x x X $ % % % % % $ % $ $ X X $ $ $ $ X X x x x x x x " " x]
; [" x x x x X $ $ # % % % % % % $ X $ X X % $ % X X x x x x x x x x]
; [x x x X $ $ $ % % % % % $ % $ $ $ % % $ $ $ $ X X x x x x x x x x]
; [X X X $ % $ % % # % % $ $ % % % % $ % $ $ X $ X $ X X x x x X x x]
; [$ $ X $ $ % $ % % % % $ $ $ % # % % % X X X $ $ $ X X X x x x x x]
; [% X X % % $ % % % $ % $ % % % # @ % $ $ X $ X X $ X x X X x x x x]
; [$ $ % % $ $ % % $ $ X $ $ % % % % $ $ X $ $ X X X X X X x x x x x]
; [% % % X $ $ % $ $ X X $ $ $ $ % % $ $ X X X $ X X X x x X x x X X]
; [$ $ $ X $ $ X $ X X X $ $ $ $ % $ $ $ $ $ X $ X x X X X X X x X X]
; [$ $ $ $ X X $ X X X X X $ % % % % % $ X $ $ $ X x X X X $ X X $ $]
; [X $ $ $ $ $ X X X X X X X % $ % $ $ $ X X X X X x x X X x X X $ $]
; [$ $ X X $ X X x X $ $ X X $ % X X X X X X X X X x X X x x X X X X]
; [$ $ X X X X X X X $ $ $ $ $ X $ X X X X X X X x x x x x x x X X X]
; [% % % $ $ X $ X % X X X % $ $ X X X X X X x x x x x x x x x X X $]
; [$ % % $ $ $ X X $ $ $ $ $ $ X X X X x X x x x x " x x x " x x x x]
; [$ X % $ $ $ $ $ X X X X X $ $ X X X X X X x x " " " " " " " " x x]
; [$ X $ $ % % $ X X X $ X X X x x X X x x x x x " " " " " ~ " " " "]
; [$ $ X X % $ % X X X X X X X X x x X X X x x x " " " " " " ~ " " "]
; [$ $ X $ % $ $ X X X X X X x x x x x x x x x " " " " " " " " " ~ ~]
; [$ $ $ $ $ X X $ X X X X X x x x x x x x x " " " " " " " ~ " " " ~]
; [$ % X X $ $ $ $ X X X X x x x x x x x x x x " " " " ~ " " ~ " " ~]
; [% $ $ X $ X $ X $ X $ X x x x x x x x x x x " " " " ~ ~ ~ " ~ " ~]
; [$ X X X X $ $ $ $ $ X x x x x x x x x x x " " " " ~ ~ ~ ~ ~ ~ ~ ~]
; [X x X X x X X X X X X X X x x x x x x x x x " " " ~ ~ " " ~ ~ ~ ~]
; [x x x x x x X x X X x X X X x x x x x x x " x " " " " " ~ ~ ~ ~ ~]
; [x x x x x x x x X X X X $ X X x X x x x x x x x x " ~ ~ ~ ~ ~ ~ ~]
; [" x x x x x X x X X X X X X X X X x x x x x x " " " " ~ ~ ~ ~ ~ ~]
; [" " " x x x X X X X $ $ $ X X X X X X x x x x x x x x " " ~ ~ ~ ~]
; [" " " " x x x X X X X X $ $ X X x X X x x x x x x x " " " " " ~ ~]
; [~ " " x x x x X $ X $ X $ $ X x X x x x x x x x x x x x x " " " ~]
)
; = Pictures! =
; ASCII is cool, but pictures are better. First we import some java libraries
; that we'll need, then define the colors for each level just like we did tiles
; for the ascii representation.
(import
'java.awt.image.BufferedImage
'javax.imageio.ImageIO
'java.io.File)
(def colors
[0x1437AD 0x04859D 0x007D1C 0x007D1C 0x24913C
0x00C12B 0x38E05D 0xA3A3A4 0x757575 0xFFFFFF])
; Finally we reduce over a BufferedImage instance to output every tile as a
; single pixel on it.
(defn img-terrain
"Given a terrain map and a file name, outputs a png representation of the
terrain map to that file"
[m file]
(let [img (BufferedImage. (count m) (count m) BufferedImage/TYPE_INT_RGB)]
(reduce
(fn [rown row]
(reduce
(fn [coln tile]
(.setRGB img coln rown (colors tile))
(inc coln))
0 row)
(inc rown))
0 (normalize-terrain m (dec (count colors))))
(ImageIO/write img "png" (File. file))))
(comment
(img-terrain
(terrain 10)
"resources/terrain.png")
; https://blog.mediocregopher.com/img/diamond-square/terrain.png
)
; == Conclusion ==
; There's still a lot of work to be done. The algorithm starts taking a
; non-trivial amount of time around the 10th degree, which is only a 1025x1025px
; image. I need to profile the code and find out where the bottlenecks are. It's
; possible re-organizing the code to use pmaps instead of reduces in some places
; could help.
```
[marco]: http://marcopolo.io/diamond-square/
[terrain]: /img/diamond-square/terrain.png
[diamondsquare]: http://www.gameprogrammer.com/fractal.html
[lein]: https://github.com/technomancy/leiningen
[repo]: https://github.com/mediocregopher/diamond-square

View File

@ -1,193 +0,0 @@
---
title: Erlang Pitfalls
description: >-
Common pitfalls that people may run into when designing and writing
large-scale erlang applications.
tags: tech
---
I've been involved with a large-ish scale erlang project at Grooveshark since
sometime around 2011. I started this project knowing absolutely nothing about
erlang, but now I feel I have accumulated enough knowlege over time that I could
conceivably give some back. Specifically, common pitfalls that people may run
into when designing and writing a large-scale erlang application. Some of these
may show up when searching for them, but some of them you may not even know you
need to search for.
## now() vs timestamp()
The cononical way of getting the current timestamp in erlang is to use
`erlang:now()`. This works great at small loads, but if you find your
application slowing down greatly at highly parallel loads and you're calling
`erlang:now()` a lot, it may be the culprit.
A property of this method you may not realize is that it is monotonically
increasing, meaning even if two processes call it at the *exact* same time they
will both receive different output. This is done through some locking on the
low-level, as well as a bit of math to balance out the time getting out of sync
in the scenario.
There are situations where fetching always unique timestamps is useful, such as
seeding RNGs and generating unique identifiers for things, but usually when
people fetch a timestamp they just want a timestamp. For these cases,
`os:timestamp()` can be used. It is not blocked by any locks, it simply returns
the time.
## The rpc module is slow
The built-in `rpc` module is slower than you'd think. This mostly stems from it
doing a lot of extra work for every `call` and `cast` that you do, ensuring that
certain conditions are accounted for. If, however, it's sufficient for the
calling side to know that a call timed-out on them and not worry about it any
further you may benefit from simply writing your own rpc module. Alternatively,
use [one which already exists](https://github.com/cloudant/rexi).
## Don't send anonymous functions between nodes
One of erlang's niceties is transparent message sending between two phsyical
erlang nodes. Once nodes are connected, a process on one can send any message to
a process on the other exactly as if they existed on the same node. This is fine
for many data-types, but for anonymous functions it should be avoided.
For example:
```erlang
RemotePid ! {fn, fun(I) -> I + 1 end}.
```
Would be better written as
```erlang
incr(I) ->
I + 1.
RemotePid ! {fn, ?MODULE, incr}.
```
and then using an `apply` on the RemotePid to actually execute the function.
This is because hot-swapping code messes with anonymous functions quite a bit.
Erlang isn't actually sending a function definition across the wire; it's simply
sending a reference to a function. If you've changed the code within the
anonymous function on a node, that reference changes. The sending node is
sending a reference to a function which may not exist anymore on the receiving
node, and you'll get a weird error which Google doesn't return many results for.
Alternatively, if you simply send atoms across the wire and use `apply` on the
other side, only atoms are sent and the two nodes involved can have totally
different ideas of what the function itself does without any problems.
## Hot-swapping code is a convenience, not a crutch
Hot swapping code is the bees-knees. It lets you not have to worry about
rolling-restarts for trivial code changes, and so adds stability to your
cluster. My warning is that you should not rely on it. If your cluster can't
survive a node being restarted for a code change, then it can't survive if that
node fails completely, or fails and comes back up. Design your system pretending
that hot-swapping does not exist, and only once you've done that allow yourself
to use it.
## GC sometimes needs a boost
Erlang garbage collection (GC) acts on a per-erlang-process basis, meaning that
each process decides on its own to garbage collect itself. This is nice because
it means stop-the-world isn't a problem, but it does have some interesting
effects.
We had a problem with our node memory graphs looking like an upwards facing
line, instead of a nice sinusoid relative to the number of connections during
the day. We couldn't find a memory leak *anywhere*, and so started profiling. We
found that the memory seemed to be comprised of mostly binary data in process
heaps. On a hunch my coworker Mike Cugini (who gets all the credit for this) ran
the following on a node:
```erlang
lists:foreach(erlang:garbage_collect/1, erlang:processes()).
```
and saw memory drop in a huge way. We made that code run every 10 minutes or so
and suddenly our memory problem went away.
The problem is that we had a lot of processes which individually didn't have
much heap data, but all-together were crushing the box. Each didn't think it had
enough to garbage collect very often, so memory just kept going up. Calling the
above forces all processes to garbage collect, and thus throw away all those
little binary bits they were hoarding.
## These aren't the solutions you are looking for
The `erl` process has tons of command-line options which allow you to tweak all
kinds of knobs. We've had tons of performance problems with our application, as
of yet not a single one has been solved with turning one of these knobs. They've
all been design issues or just run-of-the-mill bugs. I'm not saying the knobs
are *never* useful, but I haven't seen it yet.
## Erlang processes are great, except when they're not
The erlang model of allowing processes to manage global state works really well
in many cases. Possibly even most cases. There are, however, times when it
becomes a performance problem. This became apparent in the project I was working
on for Grooveshark, which was, at its heart, a pubsub server.
The architecture was very simple: each channel was managed by a process, client
connection processes subscribed to that channel and received publishes from it.
Easy right? The problem was that extremely high volume channels were simply not
able to keep up with the load. The channel process could do certain things very
fast, but there were some operations which simply took time and slowed
everything down. For example, channels could have arbitrary properties set on
them by their owners. Retrieving an arbitrary property from a channel was a
fairly fast operation: client `call`s the channel process, channel process
immediately responds with the property value. No blocking involved.
But as soon as there was any kind of call which required the channel process to
talk to yet *another* process (unfortunately necessary), things got hairy. On
high volume channels publishes/gets/set operations would get massively backed up
in the message queue while the process was blocked on another process. We tried
many things, but ultimately gave up on the process-per-channel approach.
We instead decided on keeping *all* channel state in a transactional database.
When client processes "called" operations on a channel, they really are just
acting on the database data inline, no message passing involved. This means that
read-only operations are super-fast because there is minimal blocking, and if
some random other process is being slow it only affects the one client making
the call which is causing it to be slow, and not holding up a whole host of
other clients.
## Mnesia might not be what you want
This one is probably a bit controversial, and definitely subject to use-cases.
Do your own testing and profiling, find out what's right for you.
Mnesia is erlang's solution for global state. It's an in-memory transactional
database which can scale to N nodes and persist to disk. It is hosted
directly in the erlang processes memory so you interact with it in erlang
directly in your code; no calling out to database drivers and such. Sounds great
right?
Unfortunately mnesia is not a very full-featured database. It is essentially a
key-value store which can hold arbitrary erlang data-types, albeit in a set
schema which you lay out for it during startup. This means that more complex
types like sorted sets and hash maps (although this was addressed with the
introduction of the map data-type in R17) are difficult to work with within
mnesia. Additionally, erlang's data model of immutability, while awesome
usually, can bite you here because it's difficult (impossible?) to pull out
chunks of data within a record without accessing the whole record.
For example, when retrieving the list of processes subscribed to a channel our
application doesn't simply pull the full list and iterate over it. This is too
slow, and in some cases the subscriber list was so large it wasn't actually
feasible. The channel process wasn't cleaning up its heap fast enough, so
multiple publishes would end up with multiple copies of the giant list in
memory. This became a problem. Instead we chain spawned processes, each of which
pull a set chunk of the subsciber list, and iterate over that. This is very
difficult to implement in mnesia without pulling the full subscriber list into
the process' memory at some point in the process.
It is, however, fairly trivial to implement in redis using sorted sets. For this
case, and many other cases after, the motto for performance improvements became
"stick it in redis". The application is at the point where *all* state which
isn't directly tied to a specific connection is kept in redis, encoded using
`term_to_binary`. The performance hit of going to an outside process for data
was actually much less than we'd originally thought, and ended up being a plus
since we had much more freedom to do interesting hacks to speedup up our
accesses.

View File

@ -1,166 +0,0 @@
---
title: Rabbit Hole
description: >-
Complex systems sometimes require complex debugging.
tags: tech
---
We've begun rolling out [SkyDNS][skydns] at my job, which has been pretty neat.
We're basing a couple future projects around being able to use it, and it's made
dynamic configuration and service discovery nice and easy.
This post chronicles catching a bug because of our switch to SkyDNS, and how we
discover its root cause. I like to call these kinds of bugs "rabbit holes"; they
look shallow at first, but anytime you make a little progress forward a little
more is always required, until you discover the ending somewhere totally
unrelated to the start.
## The Bug
We are seeing *tons* of these in the SkyDNS log:
```
[skydns] Feb 20 17:21:15.168 INFO | no nameservers defined or name too short, can not forward
```
I fire up tcpdump to see if I can see anything interesting, and sure enough run
across a bunch of these:
```
# tcpdump -vvv -s 0 -l -n port 53
tcpdump: listening on eth0, link-type EN10MB (Ethernet), capture size 65535 bytes
...
$fen_ip.50257 > $skydns_ip.domain: [udp sum ok] 16218+ A? unknown. (25)
$fen_ip.27372 > $skydns_ip.domain: [udp sum ok] 16218+ A? unknown. (25)
$fen_ip.35634 > $skydns_ip.domain: [udp sum ok] 59227+ A? unknown. (25)
$fen_ip.64363 > $skydns_ip.domain: [udp sum ok] 59227+ A? unknown. (25)
```
It appears that some of our front end nodes (FENs) are making tons of DNS
fequests trying to find the A record of `unknown`. Something on our FENs is
doing something insane and is breaking.
## The FENs
Hopping over to my favorite FEN we're able to see the packets in question
leaving on a tcpdump as well, but that's not helpful for finding the root cause.
We have lots of processes running on the FENs and any number of them could be
doing something crazy.
We fire up sysdig, which is similar to systemtap and strace in that it allows
you to hook into the kernel and view various kernel activites in real time, but
it's easier to use than both. The following command dumps all UDP packets being
sent and what process is sending them:
```
# sysdig fd.l4proto=udp
...
2528950 22:17:35.260606188 0 php-fpm (21477) < connect res=0 tuple=$fen_ip:61173->$skydns_ip:53
2528961 22:17:35.260611327 0 php-fpm (21477) > sendto fd=102(<4u>$fen_ip:61173->$skydns_ip:53) size=25 tuple=NULL
2528991 22:17:35.260631917 0 php-fpm (21477) < sendto res=25 data=.r...........unknown.....
2530470 22:17:35.261879032 0 php-fpm (21477) > ioctl fd=102(<4u>$fen_ip:61173->$skydns_ip:53) request=541B argument=7FFF82DC8728
2530472 22:17:35.261880574 0 php-fpm (21477) < ioctl res=0
2530474 22:17:35.261881226 0 php-fpm (21477) > recvfrom fd=102(<4u>$fen_ip:61173->$skydns_ip:53) size=1024
2530476 22:17:35.261883424 0 php-fpm (21477) < recvfrom res=25 data=.r...........unknown..... tuple=$skydns_ip:53->$fen_ip:61173
2530485 22:17:35.261888997 0 php-fpm (21477) > close fd=102(<4u>$fen_ip:61173->$skydns_ip:53)
2530488 22:17:35.261892626 0 php-fpm (21477) < close res=0
```
Aha! We can see php-fpm is requesting something over udp with the string
`unknown` in it. We've now narrowed down the guilty process, the rest should be
easy right?
## Which PHP?
Unfortunately we're a PHP shop; knowing that php-fpm is doing something on a FEN
narrows down the guilty codebase little. Taking the FEN out of our load-balancer
stops the requests for `unknown`, so we *can* say that it's some user-facing
code that is the culprit. Our setup on the FENs involves users hitting nginx
for static content and nginx proxying PHP requests back to php-fpm. Since all
our virtual domains are defined in nginx, we are able to do something horrible.
On the particular FEN we're on we make a guess about which virtual domain the
problem is likely coming from (our main app), and proxy all traffic from all
other domains to a different FEN. We still see requests for `unknown` leaving
the box, so we've narrowed the problem down a little more.
## The Despair
Nothing in our code is doing any direct DNS calls as far as we can find, and we
don't see any places PHP might be doing it for us. We have lots of PHP
extensions in place, all written in C and all black boxes; any of them could be
the culprit. Grepping through the likely candidates' source code for the string
`unknown` proves fruitless.
We try xdebug at this point. xdebug is a profiler for php which will create
cachegrind files for the running code. With cachegrind you can see every
function which was ever called, how long spent within each function, a full
call-graph, and lots more. Unfortunately xdebug outputs cachegrind files on a
per-php-fpm-process basis, and overwrites the previous file on each new request.
So xdebug is pretty much useless, since what is in the cachegrind file isn't
necessarily what spawned the DNS request.
## Gotcha (sorta)
We turn back to the tried and true method of dumping all the traffic using
tcpdump and perusing through that manually.
What we find is that nearly everytime there is a DNS request for `unknown`, if
we scroll up a bit there is (usually) a particular request to memcache. The
requested key is always in the style of `function-name:someid:otherstuff`. When
looking in the code around that function name we find this ominous looking call:
```php
$ipAddress = getIPAddress();
$geoipInfo = getCountryInfoFromIP($ipAddress);
```
This points us in the right direction. On a hunch we add some debug
logging to print out the `$ipAddress` variable, and sure enough it comes back as
`unknown`. AHA!
So what we surmise is happening is that for some reason our geoip extension,
which we use to get the location data of an IP address and which
`getCountryInfoFromIP` calls, is seeing something which is *not* an IP address
and trying to resolve it.
## Gotcha (for real)
So the question becomes: why are we getting the string `unknown` as an IP
address?
Adding some debug logging around the area we find before showed that
`$_SERVER['REMOTE_ADDR']`, which is the variable populated with the IP address
of the client, is sometimes `unknown`. We guess that this has something to do
with some magic we are doing on nginx's side to populate `REMOTE_ADDR` with the
real IP address of the client in the case of them going through a proxy.
Many proxies send along the header `X-Forwarded-For` to indicate the real IP of
the client they're proxying for, otherwise the server would only see the proxy's
IP. In our setup I decided that in those cases we should set the `REMOTE_ADDR`
to the real client IP so our application logic doesn't even have to worry about
it. There are a couple problems with this which render it a bad decision, one
being that if some misbahaving proxy was to, say, start sending
`X-Forwarded-For: unknown` then some written applications might mistake that to
mean the client's IP is `unknown`.
## The Fix
The fix here was two-fold:
1) We now always set `$_SERVER['REMOTE_ADDR']` to be the remote address of the
requests, regardless of if it's a proxy, and also send the application the
`X-Forwarded-For` header to do with as it pleases.
2) Inside our app we look at all the headers sent and do some processing to
decide what the actual client IP is. PHP can handle a lot more complex logic
than nginx can, so we can do things like check to make sure the IP is an IP, and
also that it's not some NAT'd internal ip, and so forth.
And that's it. From some weird log messages on our DNS servers to an nginx
mis-configuration on an almost unrelated set of servers, this is one of those
strange bugs that never has a nice solution and goes unsolved for a long time.
Spending the time to dive down the rabbit hole and find the answer is often
tedious, but also often very rewarding.
[skydns]: https://github.com/skynetservices/skydns

View File

@ -1,547 +0,0 @@
---
title: Go's http package by example
description: >-
The basics of using, testing, and composing apps built using go's net/http
package.
---
Go's [http](http://golang.org/pkg/net/http/) package has turned into one of my
favorite things about the Go programming language. Initially it appears to be
somewhat complex, but in reality it can be broken down into a couple of simple
components that are extremely flexible in how they can be used. This guide will
cover the basic ideas behind the http package, as well as examples in using,
testing, and composing apps built with it.
This guide assumes you have some basic knowledge of what an interface in Go is,
and some idea of how HTTP works and what it can do.
## Handler
The building block of the entire http package is the `http.Handler` interface,
which is defined as follows:
```go
type Handler interface {
ServeHTTP(ResponseWriter, *Request)
}
```
Once implemented the `http.Handler` can be passed to `http.ListenAndServe`,
which will call the `ServeHTTP` method on every incoming request.
`http.Request` contains all relevant information about an incoming http request
which is being served by your `http.Handler`.
The `http.ResponseWriter` is the interface through which you can respond to the
request. It implements the `io.Writer` interface, so you can use methods like
`fmt.Fprintf` to write a formatted string as the response body, or ones like
`io.Copy` to write out the contents of a file (or any other `io.Reader`). The
response code can be set before you begin writing data using the `WriteHeader`
method.
Here's an example of an extremely simple http server:
```go
package main
import (
"fmt"
"log"
"net/http"
)
type helloHandler struct{}
func (h helloHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "hello, you've hit %s\n", r.URL.Path)
}
func main() {
err := http.ListenAndServe(":9999", helloHandler{})
log.Fatal(err)
}
```
`http.ListenAndServe` serves requests using the handler, listening on the given
address:port. It will block unless it encounters an error listening, in which
case we `log.Fatal`.
Here's an example of using this handler with curl:
```
~ $ curl localhost:9999/foo/bar
hello, you've hit /foo/bar
```
## HandlerFunc
Often defining a full type to implement the `http.Handler` interface is a bit
overkill, especially for extremely simple `ServeHTTP` functions like the one
above. The `http` package provides a helper function, `http.HandlerFunc`, which
wraps a function which has the signature
`func(w http.ResponseWriter, r *http.Request)`, returning an `http.Handler`
which will call it in all cases.
The following behaves exactly like the previous example, but uses
`http.HandlerFunc` instead of defining a new type.
```go
package main
import (
"fmt"
"log"
"net/http"
)
func main() {
h := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "hello, you've hit %s\n", r.URL.Path)
})
err := http.ListenAndServe(":9999", h)
log.Fatal(err)
}
```
## ServeMux
On their own, the previous examples don't seem all that useful. If we wanted to
have different behavior for different endpoints we would end up with having to
parse path strings as well as numerous `if` or `switch` statements. Luckily
we're provided with `http.ServeMux`, which does all of that for us. Here's an
example of it being used:
```go
package main
import (
"fmt"
"log"
"net/http"
)
func main() {
h := http.NewServeMux()
h.HandleFunc("/foo", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintln(w, "Hello, you hit foo!")
})
h.HandleFunc("/bar", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintln(w, "Hello, you hit bar!")
})
h.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(404)
fmt.Fprintln(w, "You're lost, go home")
})
err := http.ListenAndServe(":9999", h)
log.Fatal(err)
}
```
The `http.ServeMux` is itself an `http.Handler`, so it can be passed into
`http.ListenAndServe`. When it receives a request it will check if the request's
path is prefixed by any of its known paths, choosing the longest prefix match it
can find. We use the `/` endpoint as a catch-all to catch any requests to
unknown endpoints. Here's some examples of it being used:
```
~ $ curl localhost:9999/foo
Hello, you hit foo!
~ $ curl localhost:9999/bar
Hello, you hit bar!
~ $ curl localhost:9999/baz
You're lost, go home
```
`http.ServeMux` has both `Handle` and `HandleFunc` methods. These do the same
thing, except that `Handle` takes in an `http.Handler` while `HandleFunc` merely
takes in a function, implicitly wrapping it just as `http.HandlerFunc` does.
### Other muxes
There are numerous replacements for `http.ServeMux` like
[gorilla/mux](http://www.gorillatoolkit.org/pkg/mux) which give you things like
automatically pulling variables out of paths, easily asserting what http methods
are allowed on an endpoint, and more. Most of these replacements will implement
`http.Handler` like `http.ServeMux` does, and accept `http.Handler`s as
arguments, and so are easy to use in conjunction with the rest of the things
I'm going to talk about in this post.
## Composability
When I say that the `http` package is composable I mean that it is very easy to
create re-usable pieces of code and glue them together into a new working
application. The `http.Handler` interface is the way all pieces communicate with
each other. Here's an example of where we use the same `http.Handler` to handle
multiple endpoints, each slightly differently:
```go
package main
import (
"fmt"
"log"
"net/http"
)
type numberDumper int
func (n numberDumper) ServeHTTP(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "Here's your number: %d\n", n)
}
func main() {
h := http.NewServeMux()
h.Handle("/one", numberDumper(1))
h.Handle("/two", numberDumper(2))
h.Handle("/three", numberDumper(3))
h.Handle("/four", numberDumper(4))
h.Handle("/five", numberDumper(5))
h.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(404)
fmt.Fprintln(w, "That's not a supported number!")
})
err := http.ListenAndServe(":9999", h)
log.Fatal(err)
}
```
`numberDumper` implements `http.Handler`, and can be passed into the
`http.ServeMux` multiple times to serve multiple endpoints. Here's it in action:
```
~ $ curl localhost:9999/one
Here's your number: 1
~ $ curl localhost:9999/five
Here's your number: 5
~ $ curl localhost:9999/bazillion
That's not a supported number!
```
## Testing
Testing http endpoints is extremely easy in Go, and doesn't even require you to
actually listen on any ports! The `httptest` package provides a few handy
utilities, including `NewRecorder` which implements `http.ResponseWriter` and
allows you to effectively make an http request by calling `ServeHTTP` directly.
Here's an example of a test for our previously implemented `numberDumper`,
commented with what exactly is happening:
```go
package main
import (
"fmt"
"net/http"
"net/http/httptest"
. "testing"
)
func TestNumberDumper(t *T) {
// We first create the http.Handler we wish to test
n := numberDumper(1)
// We create an http.Request object to test with. The http.Request is
// totally customizable in every way that a real-life http request is, so
// even the most intricate behavior can be tested
r, _ := http.NewRequest("GET", "/one", nil)
// httptest.Recorder implements the http.ResponseWriter interface, and as
// such can be passed into ServeHTTP to receive the response. It will act as
// if all data being given to it is being sent to a real client, when in
// reality it's being buffered for later observation
w := httptest.NewRecorder()
// Pass in our httptest.Recorder and http.Request to our numberDumper. At
// this point the numberDumper will act just as if it was responding to a
// real request
n.ServeHTTP(w, r)
// httptest.Recorder gives a number of fields and methods which can be used
// to observe the response made to our request. Here we check the response
// code
if w.Code != 200 {
t.Fatalf("wrong code returned: %d", w.Code)
}
// We can also get the full body out of the httptest.Recorder, and check
// that its contents are what we expect
body := w.Body.String()
if body != fmt.Sprintf("Here's your number: 1\n") {
t.Fatalf("wrong body returned: %s", body)
}
}
```
In this way it's easy to create tests for your individual components that you
are using to build your application, keeping the tests near to the functionality
they're testing.
Note: if you ever do need to spin up a test server in your tests, `httptest`
also provides a way to create a server listening on a random open port for use
in tests as well.
## Middleware
Serving endpoints is nice, but often there's functionality you need to run for
*every* request before the actual endpoint's handler is run. For example, access
logging. A middleware component is one which implements `http.Handler`, but will
actually pass the request off to another `http.Handler` after doing some set of
actions. The `http.ServeMux` we looked at earlier is actually an example of
middleware, since it passes the request off to another `http.Handler` for actual
processing. Here's an example of our previous example with some logging
middleware:
```go
package main
import (
"fmt"
"log"
"net/http"
)
type numberDumper int
func (n numberDumper) ServeHTTP(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "Here's your number: %d\n", n)
}
func logger(h http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
log.Printf("%s requested %s", r.RemoteAddr, r.URL)
h.ServeHTTP(w, r)
})
}
func main() {
h := http.NewServeMux()
h.Handle("/one", numberDumper(1))
h.Handle("/two", numberDumper(2))
h.Handle("/three", numberDumper(3))
h.Handle("/four", numberDumper(4))
h.Handle("/five", numberDumper(5))
h.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(404)
fmt.Fprintln(w, "That's not a supported number!")
})
hl := logger(h)
err := http.ListenAndServe(":9999", hl)
log.Fatal(err)
}
```
`logger` is a function which takes in an `http.Handler` called `h`, and returns
a new `http.Handler` which, when called, will log the request it was called with
and then pass off its arguments to `h`. To use it we pass in our
`http.ServeMux`, so all incoming requests will first be handled by the logging
middleware before being passed to the `http.ServeMux`.
Here's an example log entry which is output when the `/five` endpoint is hit:
```
2015/06/30 20:15:41 [::1]:34688 requested /five
```
## Middleware chaining
Being able to chain middleware together is an incredibly useful ability which we
get almost for free, as long as we use the signature
`func(http.Handler) http.Handler`. A middleware component returns the same type
which is passed into it, so simply passing the output of one middleware
component into the other is sufficient.
However, more complex behavior with middleware can be tricky. For instance, what
if you want a piece of middleware which takes in a parameter upon creation?
Here's an example of just that, with a piece of middleware which will set a
header and its value for all requests:
```go
package main
import (
"fmt"
"log"
"net/http"
)
type numberDumper int
func (n numberDumper) ServeHTTP(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "Here's your number: %d\n", n)
}
func logger(h http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
log.Printf("%s requested %s", r.RemoteAddr, r.URL)
h.ServeHTTP(w, r)
})
}
type headerSetter struct {
key, val string
handler http.Handler
}
func (hs headerSetter) ServeHTTP(w http.ResponseWriter, r *http.Request) {
w.Header().Set(hs.key, hs.val)
hs.handler.ServeHTTP(w, r)
}
func newHeaderSetter(key, val string) func(http.Handler) http.Handler {
return func(h http.Handler) http.Handler {
return headerSetter{key, val, h}
}
}
func main() {
h := http.NewServeMux()
h.Handle("/one", numberDumper(1))
h.Handle("/two", numberDumper(2))
h.Handle("/three", numberDumper(3))
h.Handle("/four", numberDumper(4))
h.Handle("/five", numberDumper(5))
h.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(404)
fmt.Fprintln(w, "That's not a supported number!")
})
hl := logger(h)
hhs := newHeaderSetter("X-FOO", "BAR")(hl)
err := http.ListenAndServe(":9999", hhs)
log.Fatal(err)
}
```
And here's the curl output:
```
~ $ curl -i localhost:9999/three
HTTP/1.1 200 OK
X-Foo: BAR
Date: Wed, 01 Jul 2015 00:39:48 GMT
Content-Length: 22
Content-Type: text/plain; charset=utf-8
Here's your number: 3
```
`newHeaderSetter` returns a function which accepts and returns an
`http.Handler`. Calling that returned function with an `http.Handler` then gets
you an `http.Handler` which will set the header given to `newHeaderSetter`
before continuing on to the given `http.Handler`.
This may seem like a strange way of organizing this; for this example the
signature for `newHeaderSetter` could very well have looked like this:
```
func newHeaderSetter(key, val string, h http.Handler) http.Handler
```
And that implementation would have worked fine. But it would have been more
difficult to compose going forward. In the next section I'll show what I mean.
## Composing middleware with alice
[Alice](https://github.com/justinas/alice) is a very simple and convenient
helper for working with middleware using the function signature we've been using
thusfar. Alice is used to create and use chains of middleware. Chains can even
be appended to each other, giving even further flexibility. Here's our previous
example with a couple more headers being set, but also using alice to manage the
added complexity.
```go
package main
import (
"fmt"
"log"
"net/http"
"github.com/justinas/alice"
)
type numberDumper int
func (n numberDumper) ServeHTTP(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "Here's your number: %d\n", n)
}
func logger(h http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
log.Printf("%s requested %s", r.RemoteAddr, r.URL)
h.ServeHTTP(w, r)
})
}
type headerSetter struct {
key, val string
handler http.Handler
}
func (hs headerSetter) ServeHTTP(w http.ResponseWriter, r *http.Request) {
w.Header().Set(hs.key, hs.val)
hs.handler.ServeHTTP(w, r)
}
func newHeaderSetter(key, val string) func(http.Handler) http.Handler {
return func(h http.Handler) http.Handler {
return headerSetter{key, val, h}
}
}
func main() {
h := http.NewServeMux()
h.Handle("/one", numberDumper(1))
h.Handle("/two", numberDumper(2))
h.Handle("/three", numberDumper(3))
h.Handle("/four", numberDumper(4))
fiveHS := newHeaderSetter("X-FIVE", "the best number")
h.Handle("/five", fiveHS(numberDumper(5)))
h.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(404)
fmt.Fprintln(w, "That's not a supported number!")
})
chain := alice.New(
newHeaderSetter("X-FOO", "BAR"),
newHeaderSetter("X-BAZ", "BUZ"),
logger,
).Then(h)
err := http.ListenAndServe(":9999", chain)
log.Fatal(err)
}
```
In this example all requests will have the headers `X-FOO` and `X-BAZ` set, but
the `/five` endpoint will *also* have the `X-FIVE` header set.
## Fin
Starting with a simple idea of an interface, the `http` package allows us to
create for ourselves an incredibly useful and flexible (yet still rather simple)
ecosystem for building web apps with re-usable components, all without breaking
our static checks.

View File

@ -1,236 +0,0 @@
---
title: Happy Trees
description: >-
Visualizing a forest of happy trees.
tags: tech art
---
Source code related to this post is available [here](https://github.com/mediocregopher/happy-tree).
This project was inspired by [this video](https://www.youtube.com/watch?v=_DpzAvb3Vk4),
which you should watch first in order to really understand what's going on.
My inspiration came from his noting that happification could be done on numbers
in bases other than 10. I immediately thought of hexadecimal, base-16, since I'm
a programmer and that's what I think of. I also was trying to think of how one
would graphically represent a large happification tree, when I realized that
hexadecimal numbers are colors, and colors graphically represent things nicely!
## Colors
Colors to computers are represented using 3-bytes, encompassing red, green, and
blue. Each byte is represented by two hexadecimal digits, and they are appended
together. For example `FF0000` represents maximum red (`FF`) added to no green
and no blue. `FF5500` represents maximum red (`FF`), some green (`55`) and no
blue (`00`), which when added together results in kind of an orange color.
## Happifying colors
In base 10, happifying a number is done by splitting its digits, squaring each
one individually, and adding the resulting numbers. The principal works the same
for hexadecimal numbers:
```
A4F
A*A + 4*4 + F*F
64 + 10 + E1
155 // 341 in decimal
```
So if all colors are 6-digit hexadecimal numbers, they can be happified easily!
```
FF5500
F*F + F*F + 5*5 + 5*5 + 0*0 + 0*0
E1 + E1 + 19 + 19 + 0 + 0
0001F4
```
So `FF5500` (an orangish color) happifies to `0001F4` (a darker blue). Since
order of digits doesn't matter, `5F50F0` also happifies to `0001F4`. From this
fact, we can make a tree (hence the happification tree). I can do this process
on every color from `000000` (black) to `FFFFFF` (white), so I will!
## Representing the tree
So I know I can represent the tree using color, but there's more to decide on
than that. The easy way to represent a tree would be to simply draw a literal
tree graph, with a circle for each color and lines pointing to its parent and
children. But this is boring, and also if I want to represent *all* colors the
resulting image would be enormous and/or unreadable.
I decided on using a hollow, multi-level pie-chart. Using the example
of `000002`, it would look something like this:
![An example of a partial multi-level pie chart](/img/happy-tree/partial.png)
The inner arc represents the color `000002`. The second arc represents the 15
different colors which happify into `000002`, each of them may also have their
own outer arc of numbers which happify to them, and so on.
This representation is nice because a) It looks cool and b) it allows the
melancoils of the hexadecimals to be placed around the happification tree
(numbers which happify into `000001`), which is convenient. It's also somewhat
easier to code than a circle/branch based tree diagram.
An important feature I had to implement was proportional slice sizes. If I were
to give each child of a color an equal size on that arc's edge the image would
simply not work. Some branches of the tree are extremely deep, while others are
very shallow. If all were given the same space, those deep branches wouldn't
even be representable by a single pixel's width, and would simply fail to show
up. So I implemented proportional slice sizes, where the size of every slice is
determined to be proportional to how many total (recursively) children it has.
You can see this in the above example, where the second level arc is largely
comprised of one giant slice, with many smaller slices taking up the end.
## First attempt
My first attempt resulted in this image (click for 5000x5000 version):
[![Result of first attempt](/img/happy-tree/happy-tree-atmp1-small.png)](/img/happy-tree/happy-tree-atmp1.png)
The first thing you'll notice is that it looks pretty neat.
The second thing you'll notice is that there's actually only one melancoil in
the 6-digit hexadecimal number set. The innermost black circle is `000000` which
only happifies to itself, and nothing else will happify to it (sad `000000`).
The second circle represents `000001`, and all of its runty children. And
finally the melancoil, comprised of:
```
00000D -> 0000A9 -> 0000B5 -> 000092 -> 000055 -> 00003 -> ...
```
The final thing you'll notice (or maybe it was the first, since it's really
obvious) is that it's very blue. Non-blue colors are really only represented as
leaves on their trees and don't ever really have any children of their own, so
the blue and black sections take up vastly more space.
This makes sense. The number which should generate the largest happification
result, `FFFFFF`, only results in `000546`, which is primarily blue. So in effect
all colors happify to some shade of blue.
This might have been it, technically this is the happification tree and the
melancoil of 6 digit hexadecimal numbers represented as colors. But it's also
boring, and I wanted to do better.
## Second attempt
The root of the problem is that the definition of "happification" I used
resulted in not diverse enough results. I wanted something which would give me
numbers where any of the digits could be anything. Something more random.
I considered using a hash instead, like md5, but that has its own problems.
There's no gaurantee that any number would actually reach `000001`, which isn't
required but it's a nice feature that I wanted. It also would be unlikely that
there would be any melancoils that weren't absolutely gigantic.
I ended up redefining what it meant to happify a hexadecimal number. Instead of
adding all the digits up, I first split up the red, green, and blue digits into
their own numbers, happified those numbers, and finally reassembled the results
back into a single number. For example:
```
FF5500
FF, 55, 00
F*F + F*F, 5*5 + 5*5, 0*0 + 0*0
1C2, 32, 00
C23200
```
I drop that 1 on the `1C2`, because it has no place in this system. Sorry 1.
Simply replacing that function resulted in this image (click for 5000x5000) version:
[![Result of second attempt](/img/happy-tree/happy-tree-atmp2-small.png)](/img/happy-tree/happy-tree-atmp2.png)
The first thing you notice is that it's so colorful! So that goal was achieved.
The second thing you notice is that there's *significantly* more melancoils.
Hundreds, even. Here's a couple of the melancoils (each on its own line):
```
00000D -> 0000A9 -> 0000B5 -> 000092 -> 000055 -> 000032 -> ...
000D0D -> 00A9A9 -> 00B5B5 -> 009292 -> 005555 -> 003232 -> ...
0D0D0D -> A9A9A9 -> B5B5B5 -> 929292 -> 555555 -> 323232 -> ...
0D0D32 -> A9A90D -> B5B5A9 -> 9292B5 -> 555592 -> 323255 -> ...
...
```
And so on. You'll notice the first melancoil listed is the same as the one from
the first attempt. You'll also notice that the same numbers from the that
melancoil are "re-used" in the rest of them as well. The second coil listed is
the same as the first, just with the numbers repeated in the 3rd and 4th digits.
The third coil has those numbers repeated once more in the 1st and 2nd digits.
The final coil is the same numbers, but with the 5th and 6th digits offset one
place in the rotation.
The rest of the melancoils in this attempt work out to just be every conceivable
iteration of the above. This is simply a property of the algorithm chosen, and
there's not a whole lot we can do about it.
## Third attempt
After talking with [Mr. Marco](/members/#marcopolo) about the previous attempts
I got an idea that would lead me towards more attempts. The main issue I was
having in coming up with new happification algorithms was figuring out what to
do about getting a number greater than `FFFFFF`. Dropping the leading digits
just seemed.... lame.
One solution I came up with was to simply happify again. And again, and again.
Until I got a number less than or equal to `FFFFFF`.
With this new plan, I could increase the power by which I'm raising each
individual digit, and drop the strategy from the second attempt of splitting the
number into three parts. In the first attempt I was doing happification to the
power of 2, but what if I wanted to happify to the power of 6? It would look
something like this (starting with the number `34BEEF`):
```
34BEEF
3^6 + 4^6 + B^6 + E^6 + E^6 + E^6 + F^6
2D9 + 1000 + 1B0829 + 72E440 + 72E440 + ADCEA1
1AEB223
1AEB223 is greater than FFFFFF, so we happify again
1^6 + A^6 + E^6 + B^6 + 2^6 + 2^6 + 3^6
1 + F4240 + 72E440 + 1B0829 + 40 + 40 + 2D9
9D3203
```
So `34BEEF` happifies to `9D3203`, when happifying to the power of 6.
As mentioned before the first attempt in this blog was the 2nd power tree,
here's the trees for the 3rd, 4th, 5th, and 6th powers (each image is a link to
a larger version):
3rd power:
[![Third attempt, 3rd power](/img/happy-tree/happy-tree-atmp3-pow3-small.png)](/img/happy-tree/happy-tree-atmp3-pow3.png)
4th power:
[![Third attempt, 4th power](/img/happy-tree/happy-tree-atmp3-pow4-small.png)](/img/happy-tree/happy-tree-atmp3-pow4.png)
5th power:
[![Third attempt, 5th power](/img/happy-tree/happy-tree-atmp3-pow5-small.png)](/img/happy-tree/happy-tree-atmp3-pow5.png)
6th power:
[![Third attempt, 6th power](/img/happy-tree/happy-tree-atmp3-pow6-small.png)](/img/happy-tree/happy-tree-atmp3-pow6.png)
A couple things to note:
* 3-5 are still very blue. It's not till the 6th power that the distribution
becomes random enough to become very colorful.
* Some powers have more coils than others. Power of 3 has a lot, and actually a
lot of them aren't coils, but single narcissistic numbers. Narcissistic
numbers are those which happify to themselves. `000000` and `000001` are
narcissistic numbers in all powers, power of 3 has quite a few more.
* 4 looks super cool.
Using unsigned 64-bit integers I could theoretically go up to the power of 15.
But I hit a roadblock at power of 7, in that there's actually a melancoil which
occurs whose members are all greater than `FFFFFF`. This means that my strategy
of repeating happifying until I get under `FFFFFF` doesn't work for any numbers
which lead into that coil.

View File

@ -1,105 +0,0 @@
---
title: Brian Bars
description: >-
Cheap and easy to make, healthy, vegan, high-carb, high-protein. "The Good
Stuff".
updated: 2018-01-18
---
It actually blows my mind it's been 4 years since I used this blog. It was
previously a tech blog, but then I started putting all my tech-related posts on
[the cryptic blog](https://cryptic.io). As of now this is a lifestyle/travel
blog. The me of 4 years ago would be horrified.
Now I just have to come up with a lifestyle and do some traveling.
## Recipe
This isn't a real recipe because I'm not going to preface it with my entire
fucking life story. Let's talk about the food.
Brian bars:
* Are like Clif Bars, but with the simplicity of ingredients that Larabars have.
* Are easy to make, only needing a food processor (I use a magic bullet) and a
stovetop oven.
* Keep for a long time and don't really need refrigerating (but don't mind it
neither)
* Are paleo, vegan, gluten-free, free-range, grass-fed, whatever...
* Are really really filling.
* Are named after me, deal with it.
I've worked on this recipe for a bit, trying to make it workable, and will
probably keep adjusting it (and this post) as time goes on.
### Ingredients
Nuts and seeds. Most of this recipe is nuts and seeds. Here's the ones I used:
* 1 cup almonds
* 1 cup peanuts
* 1 cup walnuts
* 1 cup coconut flakes/shavings/whatever
* 1/2 cup flax seeds
* 1/2 cup sesame seeds
For all of those above it doesn't _really_ matter what nuts/seeds you use, it's
all gonna get ground up anyway. So whatever's cheap works fine. Also, avoid
salt-added ones if you can.
The other ingredients are:
* 1 cup raisins/currants
* 1.5 lbs of pitted dates (no added sugar! you don't need it!)
* 2 cups oats
### Grind up the nuts
Throw the nuts into the food processor and grind them into a powder. Then throw
that powder into a bowl along with the seeds, coconuts, raisins, and oats, and
mix em good.
I don't _completely_ grind up the nuts, instead leaving some chunks in it here
and there, but you do you.
### Prepare the dates
This is the harder part, and is what took me a couple tries to get right. The
best strategy I've found is to steam the dates a bit over a stove to soften
them. Then, about a cup at a time, you can throw them in the food processor and
turn them into a paste. You may have to add a little water if your processor is
having trouble.
Once processed you can add the dates to the mix from before and stir it all up.
It'll end up looking something like cookie dough. Except unlike cookie dough
it's completely safe to eat and maybe sorta healthy.
### Bake it, Finish it
Put the dough stuff in a pan of some sort, flatten it out, and stick it in the
oven at like 250 or 300 for a few hours. You're trying to cook out the water you
added earlier when you steamed the dates, as well as whatever little moisture
the dates had in the first place.
Once thoroughly baked you can stick the pan in the fridge to cool and keep,
and/or cut it up into individual bars. Keep in mind that the bars are super
filling and allow for pretty small portions. Wrap em in foil or plastic wrap and
take them to-go, or keep them around for a snack. Or both. Or whatever you want
to do, it's your food.
### Cleanup
Dates are simultaneously magical and the most annoying thing to work with, so
there's cleanup problems you may run into with them:
Protip #1: When cleaning your processed date slime off of your cooking utensils
I'd recommend just letting them soak in water for a while. Dry-ish date slime
will stick to everything, while soaked date slime will come right off.
Protip #2: Apparently if you want ants, dates are a great way to get ants. My
apartment has never had an ant problem until 3 hours after I made a batch of
these and didn't wipe down my counter enough. I'm still dealing with the ants.
Apparently there's enviromentally friendly ant poisons where the ants happily
carry the poison back into the nest and the whole nest eats it and dies. Which
feels kinda mean in some way, but is also pretty clever and they're just ants
anyway so fuck it.

View File

@ -1,293 +0,0 @@
---
title: Rethinking Identity
description: >-
A more useful way of thinking about identity on the internet, and using that
to build a service which makes our online life better.
tags: tech
---
In my view, the major social media platforms (Facebook, Twitter, Instagram,
etc...) are broken. They worked well at small scales, but billions of people are
now exposed to them, and [Murphy's Law][murphy] has come into effect. The weak
points in the platforms have been found and exploited, to the point where
they're barely usable for interacting with anyone you don't already know in
person.
[murphy]: https://en.wikipedia.org/wiki/Murphy%27s_law
On the other hand, social media, at its core, is a powerful tool that humans
have developed, and it's not one to be thrown away lightly (if it can be thrown
away at all). It's worthwhile to try and fix it. So that's what this post is
about.
A lot of moaning and groaning has already been done on how social media is toxic
for the average person. But the average person isn't doing anything more than
receiving and reacting to their environment. If that environment is toxic, the
person in it becomes so as well. It's certainly possible to filter the toxicity
out, and use a platform to your own benefit, but that takes work on the user's
part. It would be nice to think that people will do more than follow the path of
least resistance, but at scale that's simply not how reality is, and people
shouldn't be expected to do that work.
To identify what has become toxic about the platforms, first we need to identify
what a non-toxic platform would look like.
The ideal definition for social media is to give people a place to socialize
with friends, family, and the rest of the world. Defining "socialize" is tricky,
and probably an exercise only a socially awkward person who doesn't do enough
socializing would undertake. "Expressing one's feelings, knowledge, and
experiences to other people, and receiving theirs in turn" feels like a good
approximation. A platform where true socializing was the only activity would be
ideal.
Here are some trends on our social media which have nothing to do with
socializing: artificially boosted follower numbers on Instagram to obtain
product sponsors, shills in Reddit comments boosting a product or company,
russian trolls on Twitter spreading propaganda, trolls everywhere being dicks
and switching IPs when they get banned, and [that basketball president whose
wife used burner Twitter accounts to trash talk players][president].
[president]: https://www.nytimes.com/2018/06/07/sports/bryan-colangelo-sixers-wife.html
These are all examples of how anonymity can be abused on social media. I want
to say up front that I'm _not_ against anonymity on the internet, and that I
think we can have our cake and eat it too. But we _should_ acknowledge the
direct and indirect problems anonymity causes. We can't trust that anyone on
social media is being honest about who they are and what their motivation is.
This problem extends outside of social media too, to Amazon product reviews (and
basically any other review system), online polls and raffles, multiplayer games,
and surely many other other cases.
## Identity
To fix social media, and other large swaths of the internet, we need to rethink
identity. This process started for me a long time ago, when I watched [this TED
talk][identity], which discusses ways in which we misunderstand identity.
Crucially, David Birch points out that identity is not a name, it's more
fundamental than that.
[identity]: https://www.ted.com/talks/david_birch_identity_without_a_name
In the context of online platforms, where a user creates an account which
identifies them in some way, identity breaks down into 3 distinct problems
which are often conflated:
* Authentication: Is this identity owned by this person?
* Differentiation: Is this identity unique to this person?
* Authorization: Is this identity allowed to do X?
For internet platform developers, authentication has been given the full focus.
Blog posts, articles, guides, and services abound which deal with properly
hashing and checking passwords, two factor authentication, proper account
recovery procedure, etc... While authentication is not a 100% solved problem,
it's had the most work done on it, and the problems which this post deals with
are not affected by it.
The problem which should instead be focused on is differentiation.
## Differentiation
I want to make very clear, once more, that I am _not_ in favor of de-anonymizing
the web, and doing so is not what I'm proposing.
Differentiation is without a doubt the most difficult identity problem to solve.
It's not even clear that it's solvable offline. Take this situation: you are in
a room, and you are told that one person is going to walk in, then leave, then
another person will do the same. These two persons may or may not be the same
person. You're allowed to do anything you like to each person (with their
consent) in order to determine if they are the same person or not.
For the vast, vast majority of cases you can simply look with your eyeballs and
see if they are different people. But this will not work 100% of the time.
Identical twins are an obvious example of two persons looking like one, but a
malicious actor with a disguise might be one person posing as two. Biometrics
like fingerprints, iris scanning, and DNA testing fail for many reasons (the
identical twin case being one). You could attempt to give the first a unique
marking on their skin, but who's to say they don't have a solvent, which can
clean that marking off, waiting right outside the door?
The solutions and refutations can continue on pedantically for some time, but
the point is that there is likely not a 100% solution, and even the 90%
solutions require significant investment. Differentiation is a hard problem,
which most developers don't want to solve. Most are fine with surrogates like
checking that an email or phone number is unique to the platform, but these
aren't enough to stop a dedicated individual or organization.
### Roll Your Own Differentiation
If a platform wants to roll their own solution to the differentiation problem, a
proper solution, it might look something like this:
* Submit an image of your passport, or other government issued ID. This would
have to be checked against the appropriate government agency to ensure the
ID is legitimate.
* Submit an image of your face, alongside a written note containing a code given
by the platform. Software to detect manipulated images would need to be
employed, as well as reverse image searching to ensure the image isn't being
reused.
* Once completed, all data needs to be hashed/fingerprinted and then destroyed,
so sensitive data isn't sitting around on servers, but can still be checked
against future users signing up for the platform.
* A dedicated support team would be needed to handle edge-cases and mistakes.
None of these is trivial, nor would I trust an up-and-coming platform which is
being bootstrapped out of a basement to implement any of them correctly.
Additionally, going through with this process would be a _giant_ point of
friction for a user creating a new account; they likely would go use a different
platform instead, which didn't have all this nonsense required.
### Differentiation as a Service
This is the crux of this post.
Instead of each platform rolling their own differentiation, what if there was a
service for it. Users would still have to go through the hassle described above,
but only once forever, and on a more trustable site. Then platforms, no matter
what stage of development they're at, could use that service to ensure that
their community of users is free from the problems of fake accounts and trolls.
This is what the service would look like:
* A user would have to, at some point, have gone through the steps above to
create an account on the differentiation-as-a-service (DaaS) platform. This
account would have the normal authentication mechanisms that most platforms
do (password, two-factor, etc...).
* When creating an account on a new platform, the user would login to their DaaS
account (similar to the common "login with Google/Facebook/Twitter" buttons).
* The DaaS then returns an opaque token, an effectively random string which
uniquely identifies that user, to the platform. The platform can then check in
its own user database for any other users using that token, and know if the
user already has an account. All of this happens without any identifying
information being passed to the platform.
Similar to how many sites outsource to Cloudflare to handle DDoS protection,
which is better handled en masse by people familiar with the problem, the DaaS
allows for outsourcing the problem of differentiation. Users are more likely to
trust an established DaaS service than a random website they're signing up for.
And signing up for a DaaS is a one-time event, so if enough platforms are using
the DaaS it could become worthwhile for them to do so.
Finally, since the DaaS also handles authentication, a platform could outsource
that aspect of identity management to it as well. This is optional for the
platform, but for smaller platforms which are just starting up it might be
worthwhile to save that development time.
### Traits of a Successful DaaS
It's possible for me to imagine a world where use of DaaS' is common, but
bridging the gap between that world and this one is not as obvious. Still, I
think it's necessary if the internet is to ever evolve passed being, primarily,
a home for trolls. There are a number of traits of an up-and-coming DaaS which
would aid it in being accepted by the internet:
* **Patience**: there is a critical mass of users and platforms using DaaS'
where it becomes more advantageous for platforms to use the DaaS than not.
Until then, the DaaS and platforms using it need to take deliberate but small
steps. For example: making DaaS usage optional for platform users, and giving
their accounts special marks to indicate they're "authentic" (like Twitter's
blue checkmark); giving those users' activity higher weight in algorithms;
allowing others to filter out activity of non-"authentic" users; etc... These
are all preliminary steps which can be taken which encourage but don't require
platform users to use a DaaS.
* **User-friendly**: most likely the platforms using a DaaS are what are going
to be paying the bills. A successful DaaS will need to remember that, no
matter where the money comes from, if the users aren't happy they'll stop
using the DaaS, and platforms will be forced to switch to a different one or
stop using them altogether. User-friendliness means more than a nice
interface; it means actually caring for the users' interests, taking their
privacy and security seriously, and in all other aspects being on their side.
In that same vein, competition is important, and so...
* **No country/government affiliation**: If the DaaS was to be run by a
government agency it would have no incentive to provide a good user
experience, since the users aren't paying the bills (they might not even be in
that country). A DaaS shouldn't be exclusive to any one government or country
anyway. Perhaps it starts out that way, to get off the ground, but ultimately
the internet is a global institution, and is healthiest when it's connecting
individuals _around the world_. A successful DaaS will reach beyond borders
and try to connect everyone.
Obviously actually starting a DaaS would be a huge undertaking, and would
require proper management and good developers and all that, but such things
apply to most services.
## Authorization
The final aspect of identity management, which I haven't talked about yet, is
authorization. This aspect deals with what a particular identity is allowed to
do. For example, is an identity allowed to claim they have a particular name, or
are from a particular place, or are of a particular age? Other things like
administration and moderation privileges also fall under authorization, but they
are generally defined and managed within a platform.
A DaaS has the potential to help with authorization as well, though with a giant
caveat. If a DaaS were to not fingerprint and destroy the user's data, like
their name and birthday and whatnot, but instead store them, then the following
use-case could also be implemented:
* A platform wants to know if a user is above a certain age, let's say. It asks
the DaaS for that information.
* The DaaS asks the user, OAuth style, whether the user is ok with giving the
platform that information.
* If so, the platform is given that information.
This is a tricky situation. It adds a lot of liablity for the user, since their
raw data will be stored with the DaaS, ripe for hacking. It also places a lot of
trust with the DaaS to be responsible with users' data and not go giving it out
willy-nilly to others, and instead to only give out the bare-minimum that the
user allows. Since the user is not the DaaS' direct customer, this might be too
much to ask. Nevertheless, it's a use-case which is worth thinking about.
## Dapps
The idea of decentralized applications, or dapps, has begun to gain traction.
While not mainstream yet, I think they have potential, and it's necessary to
discuss how a DaaS would operate in a world where the internet is no longer
hosted in central datacenters.
Consider an Ethereum-based dapp. If a user were to register one ethereum address
(which are really public keys) with their DaaS account, the following use-case
could be implemented:
* A charity dapp has an ethereum contract, which receives a call from an
ethereum address asking for money. The dapp wants to ensure every person it
sends money to hasn't received any that day.
* The DaaS has a separate ethereum contract it manages, where it stores all
addresses which have been registered to a user. There is no need to keep any
other user information in the contract.
* The charity dapp's contract calls the DaaS' contract, asking it if the address
is one of its addresses. If so, and if the charity contract hasn't given to
that address yet today, it can send money to that address.
There would perhaps need to be some mechanism by which a user could change their
address, which would be complex since that address might be in use by a dapp
already, but it's likely a solvable problem.
A charity dapp is a bit of a silly example; ideally with a charity dapp there'd
also be some mechanism to ensure a person actually _needs_ the money. But
there's other dapp ideas which would become feasible, due to the inability of a
person to impersonate many people, if DaaS use becomes normal.
## Why Did I Write This?
Perhaps you've gotten this far and are asking: "Clearly you've thought about
this a lot, why don't you make this yourself and make some phat stacks of cash
with a startup?" The answer is that this project would need to be started and
run by serious people, who can be dedicated and thorough and responsible. I'm
not sure I'm one of those people; I get distracted easily. But I would like to
see this idea tried, and so I've written this up thinking maybe someone else
would take the reins.
I'm not asking for equity or anything, if you want to try; it's a free idea for
the taking. But if it turns out to be a bazillion dollar Good Idea™, I won't say
no to a donation...

View File

@ -1,55 +0,0 @@
---
title: >-
Visualization 1
description: >-
Using clojurescript and quil to generate interesting visuals
series: viz
git_repo: https://github.com/mediocregopher/viz.git
git_commit: v1
tags: tech art
---
First I want to appologize if you've seen this already, I originally had this up
on my normal website, but I've decided to instead consolidate all my work to my
blog.
This is the first of a series of visualization posts I intend to work on, each
building from the previous one.
<script src="/assets/viz/1/goog/base.js"></script>
<script src="/assets/viz/1/cljs_deps.js"></script>
<script>goog.require("viz.core");</script>
<p align="center"><canvas id="viz"></canvas></p>
This visualization follows a few simple rules:
* Any point can only be occupied by a single node. A point may be alive (filled)
or dead (empty).
* On every tick each live point picks from 0 to N new points to spawn, where N is
the number of empty adjacent points to it. If it picks 0, it becomes dead.
* Each line indicates the parent of a point. Lines have an arbitrary lifetime of
a few ticks, and occupy the points they connect (so new points may not spawn
on top of a line).
* When a dead point has no lines it is cleaned up, and its point is no longer
occupied.
The resulting behavior is somewhere between [Conway's Game of
Life](https://en.wikipedia.org/wiki/Conway%27s_Game_of_Life) and white noise.
Though each point operates independently, they tend to move together in groups.
When two groups collide head on they tend to cancel each other out, killing most
of both. When they meet while both heading in a common direction they tend to
peacefully merge towards that direction.
Sometimes their world becomes so cluttered there's hardly room to move.
Sometimes a major coincidence of events leads to multiple groups canceling each
other at once, opening up the world and allowing for an explosion of new growth.
Some groups spiral about a single point, sustaining themselves and defending
from outside groups in the same movement. This doesn't last for very long.
The performance of this visualization is not very optimized, and will probably
eat up your CPU like nothing else. Most of the slowness comes from drawing the
lines; since there's so many individual small ones it's quite cumbersome to do.

View File

@ -1,50 +0,0 @@
---
title: >-
Visualization 2
description: >-
Now in glorious technicolor!
series: viz
git_repo: https://github.com/mediocregopher/viz.git
git_commit: v2
tags: tech art
---
<script src="/assets/viz/2/goog/base.js"></script>
<script src="/assets/viz/2/cljs_deps.js"></script>
<script>goog.require("viz.core");</script>
<p align="center"><canvas id="viz"></canvas></p>
This visualization builds on the previous. Structurally the cartesian grid has
been turned into an isometric one, but this is more of an environmental change
than a behavioral one.
Behavioral changes which were made:
* When a live point is deciding its next spawn points, it first sorts the set of
empty adjacent points from closest-to-the-center to farthest. It then chooses
a number `n` between `0` to `N` (where `N` is the sorted set's size) and
spawns new points from the first `n` points of the sorted set. `n` is chosen
based on:
* The live point's linear distance from the center.
* A random multiplier.
* Each point is spawned with an attached color, where the color chosen is a
slightly different hue than its parent. The change is deterministic, so all
child points of the same generation have the same color.
The second change is purely cosmetic, but does create a mesmerizing effect. The
first change alters the behavior dramatically. Only the points which compete for
the center are able to reproduce, but by the same token are more likely to be
starved out by other points doing the same.
In the previous visualization the points moved around in groups aimlessly. Now
the groups are all competing for the same thing, the center. As a result they
congregate and are able to be viewed as a larger whole.
The constant churn of the whole takes many forms, from a spiral in the center,
to waves crashing against each other, to outright chaos, to random purges of
nearly all points. Each form lasts for only a few seconds before giving way to
another.

View File

@ -1,588 +0,0 @@
---
title: >-
Program Structure and Composability
description: >-
Discussing the nature of program structure, the problems presented by
complex structures, and a pattern that helps in solving those problems.
tags: tech
---
## Part 0: Introduction
This post is focused on a concept I call “program structure,” which I will try
to shed some light on before discussing complex program structures. I will then
discuss why complex structures can be problematic to deal with, and will finally
discuss a pattern for dealing with those problems.
My background is as a backend engineer working on large projects that have had
many moving parts; most had multiple programs interacting with each other, used
many different databases in various contexts, and faced large amounts of load
from millions of users. Most of this post will be framed from my perspective,
and will present problems in the way I have experienced them. I believe,
however, that the concepts and problems I discuss here are applicable to many
other domains, and I hope those with a foot in both backend systems and a second
domain can help to translate the ideas between the two.
Also note that I will be using Go as my example language, but none of the
concepts discussed here are specific to Go. To that end, Ive decided to favor
readable code over “correct” code, and so have elided things that most gophers
hold near-and-dear, such as error checking and proper documentation, in order to
make the code as accessible as possible to non-gophers as well. As with before,
I trust that someone with a foot in Go and another language can help me
translate between the two.
## Part 1: Program Structure
In this section I will discuss the difference between directory and program
structure, show how global state is antithetical to compartmentalization (and
therefore good program structure), and finally discuss a more effective way to
think about program structure.
### Directory Structure
For a long time, I thought about program structure in terms of the hierarchy
present in the filesystem. In my mind, a programs structure looked like this:
```
// The directory structure of a project called gobdns.
src/
config/
dns/
http/
ips/
persist/
repl/
snapshot/
main.go
```
What I grew to learn was that this conflation of “program structure” with
“directory structure” is ultimately unhelpful. While it cant be denied that
every program has a directory structure (and if not, it ought to), this does not
mean that the way the program looks in a filesystem in any way corresponds to
how it looks in our minds eye.
The most notable way to show this is to consider a library package. Here is the
structure of a simple web-app which uses redis (my favorite database) as a
backend:
```
src/
redis/
http/
main.go
```
If I were to ask you, based on that directory structure, what the program does
in the most abstract terms, you might say something like: “The program
establishes an http server that listens for requests. It also establishes a
connection to the redis server. The program then interacts with redis in
different ways based on the http requests that are received on the server.”
And that would be a good guess. Heres a diagram that depicts the program
structure, wherein the root node, `main.go`, takes in requests from `http` and
processes them using `redis`.
{% include image.html
dir="program-structure" file="diag1.jpg" width=519
descr="Example 1"
%}
This is certainly a viable guess for how a program with that directory
structure operates, but consider another answer: “A component of the program
called `server` establishes an http server that listens for requests. `server`
also establishes a connection to a redis server. `server` then interacts with
that redis connection in different ways based on the http requests that are
received on the http server. Additionally, `server` tracks statistics about
these interactions and makes them available to other components. The root
component of the program establishes a connection to a second redis server, and
stores those statistics in that redis server.” Heres another diagram to depict
_that_ program.
{% include image.html
dir="program-structure" file="diag2.jpg" width=712
descr="Example 2"
%}
The directory structure could apply to either description; `redis` is just a
library which allows for interaction with a redis server, but it doesnt
specify _which_ or _how many_ servers. However, those are extremely important
factors that are definitely reflected in our concept of the programs
structure, and not in the directory structure. **What the directory structure
reflects are the different _kinds_ of components available to use, but it does
not reflect how a program will use those components.**
### Global State vs Compartmentalization
The directory-centric view of structure often leads to the use of global
singletons to manage access to external resources like RPC servers and
databases. In examples 1 and 2 the `redis` library might contain code which
looks something like this:
```go
// A mapping of connection names to redis connections.
var globalConns = map[string]*RedisConn{}
func Get(name string) *RedisConn {
if globalConns[name] == nil {
globalConns[name] = makeRedisConnection(name)
}
return globalConns[name]
}
```
Even though this pattern would work, it breaks with our conception of the
program structure in more complex cases like example 2. Rather than the `redis`
component being owned by the `server` component, which actually uses it, it
would be practically owned by _all_ components, since all are able to use it.
Compartmentalization has been broken, and can only be held together through
sheer human discipline.
**This is the problem with all global state. It is shareable among all
components of a program, and so is accountable to none of them.** One must look
at an entire codebase to understand how a globally held component is used,
which might not even be possible for a large codebase. Therefore, the
maintainers of these shared components rely entirely on the discipline of their
fellow coders when making changes, usually discovering where that discipline
broke down once the changes have been pushed live.
Global state also makes it easier for disparate programs/components to share
datastores for completely unrelated tasks. In example 2, rather than creating a
new redis instance for the root components statistics storage, the coder might
have instead said, “well, theres already a redis instance available, Ill just
use that.” And so, compartmentalization would have been broken further. Perhaps
the two instances _could_ be coalesced into the same instance for the sake of
resource efficiency, but that decision would be better made at runtime via the
configuration of the program, rather than being hardcoded into the code.
From the perspective of team management, global state-based patterns do nothing
except slow teams down. The person/team responsible for maintaining the central
library in which shared components live (`redis`, in the above examples)
becomes the bottleneck for creating new instances for new components, which
will further lead to re-using existing instances rather than creating new ones,
further breaking compartmentalization. Additionally the person/team responsible
for the central library, rather than the team using it, often finds themselves
as the maintainers of the shared resource.
### Component Structure
So what does proper program structure look like? In my mind the structure of a
program is a hierarchy of components, or, in other words, a tree. The leaf
nodes of the tree are almost _always_ IO related components, e.g., database
connections, RPC server frameworks or clients, message queue consumers, etc.
The non-leaf nodes will _generally_ be components that bring together the
functionalities of their children in some useful way, though they may also have
some IO functionality of their own.
Let's look at an even more complex structure, still only using the `redis` and
`http` component types:
{% include image.html
dir="program-structure" file="diag3.jpg" width=729
descr="Example 3"
%}
This component structure contains the addition of the `debug` component.
Clearly the `http` and `redis` components are reusable in different contexts,
but for this example the `debug` endpoint is as well. It creates a separate
http server that can be queried to perform runtime debugging of the program,
and can be tacked onto virtually any program. The `rest-api` component is
specific to this program and is therefore not reusable. Lets dive into it a
bit to see how it might be implemented:
```go
// RestAPI is very much not thread-safe, hopefully it doesn't have to handle
// more than one request at once.
type RestAPI struct {
redisConn *redis.RedisConn
httpSrv *http.Server
// Statistics exported for other components to see
RequestCount int
FooRequestCount int
BarRequestCount int
}
func NewRestAPI() *RestAPI {
r := new(RestAPI)
r.redisConn := redis.NewConn("127.0.0.1:6379")
// mux will route requests to different handlers based on their URL path.
mux := http.NewServeMux()
mux.HandleFunc("/foo", r.fooHandler)
mux.HandleFunc("/bar", r.barHandler)
r.httpSrv := http.NewServer(mux)
// Listen for requests and serve them in the background.
go r.httpSrv.Listen(":8000")
return r
}
func (r *RestAPI) fooHandler(rw http.ResponseWriter, r *http.Request) {
r.redisConn.Command("INCR", "fooKey")
r.RequestCount++
r.FooRequestCount++
}
func (r *RestAPI) barHandler(rw http.ResponseWriter, r *http.Request) {
r.redisConn.Command("INCR", "barKey")
r.RequestCount++
r.BarRequestCount++
}
```
In that snippet `rest-api` coalesced `http` and `redis` into a simple REST-like
api using pre-made library components. `main.go`, the root component, does much
the same:
```go
func main() {
// Create debug server and start listening in the background
debugSrv := debug.NewServer()
// Set up the RestAPI, this will automatically start listening
restAPI := NewRestAPI()
// Create another redis connection and use it to store statistics
statsRedisConn := redis.NewConn("127.0.0.1:6380")
for {
time.Sleep(1 * time.Second)
statsRedisConn.Command("SET", "numReqs", restAPI.RequestCount)
statsRedisConn.Command("SET", "numFooReqs", restAPI.FooRequestCount)
statsRedisConn.Command("SET", "numBarReqs", restAPI.BarRequestCount)
}
}
```
One thing that is clearly missing in this program is proper configuration,
whether from command-line or environment variables, etc. As it stands, all
configuration parameters, such as the redis addresses and http listen
addresses, are hardcoded. Proper configuration actually ends up being somewhat
difficult, as the ideal case would be for each component to set up its own
configuration variables without its parent needing to be aware. For example,
`redis` could set up `addr` and `pool-size` parameters. The problem is that there
are two `redis` components in the program, and their parameters would therefore
conflict with each other. An elegant solution to this problem is discussed in
the next section.
## Part 2: Components, Configuration, and Runtime
The key to the configuration problem is to recognize that, even if there are
two of the same component in a program, they cant occupy the same place in the
programs structure. In the above example, there are two `http` components: one
under `rest-api` and the other under `debug`. Because the structure is
represented as a tree of components, the “path” of any node in the tree
uniquely represents it in the structure. For example, the two `http` components
in the previous example have these paths:
```
root -> rest-api -> http
root -> debug -> http
```
If each component were to know its place in the component tree, then it would
easily be able to ensure that its configuration and initialization didnt
conflict with other components of the same type. If the `http` component sets
up a command-line parameter to know what address to listen on, the two `http`
components in that program would set up:
```
--rest-api-listen-addr
--debug-listen-addr
```
So how can we enable each component to know its path in the component structure?
To answer this, well have to take a detour through a type, called `Component`.
### Component and Configuration
The `Component` type is a made-up type (though youll be able to find an
implementation of it at the end of this post). It has a single primary purpose,
and that is to convey the programs structure to new components.
To see how this is done, let's look at a couple of `Component`'s methods:
```go
// Package mcmp
// New returns a new Component which has no parents or children. It is therefore
// the root component of a component hierarchy.
func New() *Component
// Child returns a new child of the called upon Component.
func (*Component) Child(name string) *Component
// Path returns the Component's path in the component hierarchy. It will return
// an empty slice if the Component is the root component.
func (*Component) Path() []string
```
`Child` is used to create a new `Component`, corresponding to a new child node
in the component structure, and `Path` is used retrieve the path of any
`Component` within that structure. For the sake of keeping the examples simple,
lets pretend these functions have been implemented in a package called `mcmp`.
Heres an example of how `Component` might be used in the `redis` components
code:
```go
// Package redis
func NewConn(cmp *mcmp.Component, defaultAddr string) *RedisConn {
cmp = cmp.Child("redis")
paramPrefix := strings.Join(cmp.Path(), "-")
addrParam := flag.String(paramPrefix+"-addr", defaultAddr, "Address of redis instance to connect to")
// finish setup
return redisConn
}
```
In our above example, the two `redis` components' parameters would be:
```
// This first parameter is for the stats redis, whose parent is the root and
// therefore doesn't have a prefix. Perhaps stats should be broken into its own
// component in order to fix this.
--redis-addr
--rest-api-redis-addr
```
`Component` definitely makes it easier to instantiate multiple redis components
in our program, since it allows them to know their place in the component
structure.
Having to construct the prefix for the parameters ourselves is pretty annoying,
so lets introduce a new package, `mcfg`, which acts like `flag` but is aware
of `Component`. Then `redis.NewConn` is reduced to:
```go
// Package redis
func NewConn(cmp *mcmp.Component, defaultAddr string) *RedisConn {
cmp = cmp.Child("redis")
addrParam := mcfg.String(cmp, "addr", defaultAddr, "Address of redis instance to connect to")
// finish setup
return redisConn
}
```
Easy-peasy.
#### But What About Parse?
Sharp-eyed gophers will notice that there is a key piece missing: When is
`flag.Parse`, or its `mcfg` counterpart, called? When does `addrParam` actually
get populated? It cant happen inside `redis.NewConn` because there might be
other components after `redis.NewConn` that want to set up parameters. To
illustrate the problem, lets look at a simple program that wants to set up two
`redis` components:
```go
func main() {
// Create the root Component, an empty Component.
cmp := mcmp.New()
// Create the Components for two sub-components, foo and bar.
cmpFoo := cmp.Child("foo")
cmpBar := cmp.Child("bar")
// Now we want to try to create a redis sub-component for each component.
// This will set up the parameter "--foo-redis-addr", but bar hasn't had a
// chance to set up its corresponding parameter, so the command-line can't
// be parsed yet.
fooRedis := redis.NewConn(cmpFoo, "127.0.0.1:6379")
// This will set up the parameter "--bar-redis-addr", but, as mentioned
// before, redis.NewConn can't parse command-line.
barRedis := redis.NewConn(cmpBar, "127.0.0.1:6379")
// It is only after all components have been instantiated that the
// command-line arguments can be parsed
mcfg.Parse()
}
```
While this solves our argument parsing problem, fooRedis and barRedis are not
usable yet because the actual connections have not been made. This is a classic
chicken and the egg problem. The func `redis.NewConn` needs to make a connection
which it cannot do until _after_ `mcfg.Parse` is called, but `mcfg.Parse` cannot
be called until after `redis.NewConn` has returned. We will solve this problem
in the next section.
### Instantiation vs Initialization
Lets break down `redis.NewConn` into two phases: instantiation and
initialization. Instantiation refers to creating the component on the component
structure and having it declare what it needs in order to initialize (e.g.,
configuration parameters). During instantiation, nothing external to the
program is performed; no IO, no reading of the command-line, no logging, etc.
All thats happened is that the empty template of a `redis` component has been
created.
Initialization is the phase during which the template is filled in.
Configuration parameters are read, startup actions like the creation of database
connections are performed, and logging is output for informational and debugging
purposes.
The key to making effective use of this dichotomy is to allow _all_ components
to instantiate themselves before they initialize themselves. By doing this we
can ensure, for example, that all components have had the chance to declare
their configuration parameters before configuration parsing is done.
So lets modify `redis.NewConn` so that it follows this dichotomy. It makes
sense to leave instantiation-related code where it is, but we need a mechanism
by which we can declare initialization code before actually calling it. For
this, I will introduce the idea of a “hook.”
#### But First: Augment Component
In order to support hooks, however, `Component` will need to be augmented with
a few new methods. Right now, it can only carry with it information about the
component structure, but here we will add the ability to carry arbitrary
key/value information as well:
```go
// Package mcmp
// SetValue sets the given key to the given value on the Component, overwriting
// any previous value for that key.
func (*Component) SetValue(key, value interface{})
// Value returns the value which has been set for the given key, or nil if the
// key was never set.
func (*Component) Value(key interface{}) interface{}
// Children returns the Component's children in the order they were created.
func (*Component) Children() []*Component
```
The final method allows us to, starting at the root `Component`, traverse the
component structure and interact with each `Component`s key/value store. This
will be useful for implementing hooks.
#### Hooks
A hook is simply a function that will run later. We will declare a new package,
calling it `mrun`, and say that it has two new functions:
```go
// Package mrun
// InitHook registers the given hook to the given Component.
func InitHook(cmp *mcmp.Component, hook func())
// Init runs all hooks registered using InitHook. Hooks are run in the order
// they were registered.
func Init(cmp *mcmp.Component)
```
With these two functions, we are able to defer the initialization phase of
startup by using the same `Components` we were passing around for the purpose
of denoting component structure.
Now, with these few extra pieces of functionality in place, lets reconsider the
most recent example, and make a program that creates two redis components which
exist independently of each other:
```go
// Package redis
// NOTE that NewConn has been renamed to InstConn, to reflect that the returned
// *RedisConn is merely instantiated, not initialized.
func InstConn(cmp *mcmp.Component, defaultAddr string) *RedisConn {
cmp = cmp.Child("redis")
// we instantiate an empty RedisConn instance and parameters for it. Neither
// has been initialized yet. They will remain empty until initialization has
// occurred.
redisConn := new(RedisConn)
addrParam := mcfg.String(cmp, "addr", defaultAddr, "Address of redis instance to connect to")
mrun.InitHook(cmp, func() {
// This hook will run after parameter initialization has happened, and
// so addrParam will be usable. Once this hook as run, redisConn will be
// usable as well.
*redisConn = makeRedisConnection(*addrParam)
})
// Now that cmp has had configuration parameters and intialization hooks
// set into it, return the empty redisConn instance back to the parent.
return redisConn
}
```
```go
// Package main
func main() {
// Create the root Component, an empty Component.
cmp := mcmp.New()
// Create the Components for two sub-components, foo and bar.
cmpFoo := cmp.Child("foo")
cmpBar := cmp.Child("bar")
// Add redis components to each of the foo and bar sub-components.
redisFoo := redis.InstConn(cmpFoo, "127.0.0.1:6379")
redisBar := redis.InstConn(cmpBar, "127.0.0.1:6379")
// Parse will descend into the Component and all of its children,
// discovering all registered configuration parameters and filling them from
// the command-line.
mcfg.Parse(cmp)
// Now that configuration parameters have been initialized, run the Init
// hooks for all Components.
mrun.Init(cmp)
// At this point the redis components have been fully initialized and may be
// used. For this example we'll copy all keys from one to the other.
keys := redisFoo.Command("KEYS", "*")
for i := range keys {
val := redisFoo.Command("GET", keys[i])
redisBar.Command("SET", keys[i], val)
}
}
```
## Conclusion
While the examples given here are fairly simplistic, the pattern itself is quite
powerful. Codebases naturally accumulate small, domain-specific behaviors and
optimizations over time, especially around the IO components of the program.
Databases are used with specific options that an organization finds useful,
logging is performed in particular places, metrics are counted around certain
pieces of code, etc.
By programming with component structure in mind, we are able to keep these
optimizations while also keeping the clarity and compartmentalization of the
code intact. We can keep our code flexible and configurable, while also
re-usable and testable. Also, the simplicity of the tools involved means they
can be extended and retrofitted for nearly any situation or use-case.
Overall, this is a powerful pattern that Ive found myself unable to do without
once I began using it.
### Implementation
As a final note, you can find an example implementation of the packages
described in this post here:
* [mcmp](https://godoc.org/github.com/mediocregopher/mediocre-go-lib/mcmp)
* [mcfg](https://godoc.org/github.com/mediocregopher/mediocre-go-lib/mcfg)
* [mrun](https://godoc.org/github.com/mediocregopher/mediocre-go-lib/mrun)
The packages are not stable and are likely to change frequently. Youll also
find that they have been extended quite a bit from the simple descriptions found
here, based on what Ive found useful as Ive implemented programs using
component structures. With these two points in mind, I would encourage you to
look and take whatever functionality you find useful for yourself, and not use
the packages directly. The core pieces are not different from what has been
described in this post.

View File

@ -1,56 +0,0 @@
---
title: >-
Trading in the Rain
description: >-
All those... gains... will be lost like... tears...
tags: tech art crypto
---
<!-- MIDI.js -->
<!-- polyfill -->
<script src="/assets/trading-in-the-rain/MIDI.js/inc/shim/Base64.js" type="text/javascript"></script>
<script src="/assets/trading-in-the-rain/MIDI.js/inc/shim/Base64binary.js" type="text/javascript"></script>
<script src="/assets/trading-in-the-rain/MIDI.js/inc/shim/WebAudioAPI.js" type="text/javascript"></script>
<!-- MIDI.js package -->
<script src="/assets/trading-in-the-rain/MIDI.js/js/midi/audioDetect.js" type="text/javascript"></script>
<script src="/assets/trading-in-the-rain/MIDI.js/js/midi/gm.js" type="text/javascript"></script>
<script src="/assets/trading-in-the-rain/MIDI.js/js/midi/loader.js" type="text/javascript"></script>
<script src="/assets/trading-in-the-rain/MIDI.js/js/midi/plugin.audiotag.js" type="text/javascript"></script>
<script src="/assets/trading-in-the-rain/MIDI.js/js/midi/plugin.webaudio.js" type="text/javascript"></script>
<script src="/assets/trading-in-the-rain/MIDI.js/js/midi/plugin.webmidi.js" type="text/javascript"></script>
<!-- utils -->
<script src="/assets/trading-in-the-rain/MIDI.js/js/util/dom_request_xhr.js" type="text/javascript"></script>
<script src="/assets/trading-in-the-rain/MIDI.js/js/util/dom_request_script.js" type="text/javascript"></script>
<!-- / MIDI.js -->
<script src="/assets/trading-in-the-rain/Distributor.js" type="text/javascript"></script>
<script src="/assets/trading-in-the-rain/MusicBox.js" type="text/javascript"></script>
<script src="/assets/trading-in-the-rain/RainCanvas.js" type="text/javascript"></script>
<script src="/assets/trading-in-the-rain/CW.js" type="text/javascript"></script>
<script src="/assets/trading-in-the-rain/SeriesComposer.js" type="text/javascript"></script>
<script src="/assets/trading-in-the-rain/main.js" type="text/javascript"></script>
<div id="tradingInRainModal">
For each pair listed below, live trade data will be pulled down from the
<a href="https://docs.cryptowat.ch/websocket-api/">Cryptowat.ch Websocket
API</a> and used to generate musical rain drops. The price of each trade
determines both the musical note and position of the rain drop on the screen,
while the volume of each trade determines how long the note is held and how big
the rain drop is.
<p id="markets">Pairs to be generated, by color:<br/><br/></p>
<button id="button" onclick="run()">Click Here to Begin</button>
<p id="progress"></p>
<script type="text/javascript">
fillMarketP();
if (window.addEventListener) window.addEventListener("load", autorun, false);
else if (window.attachEvent) window.attachEvent("onload", autorun);
else window.onload = autorun;
</script>
</div>
<canvas id="rainCanvas" style=""></canvas>

View File

@ -1,161 +0,0 @@
---
title: >-
Denver Protests
description: >-
Craziness
---
# Saturday, May 30th
We went to the May 30th protest at Civic Center Park. We were there for a few
hours during the day, leaving around 4pm. I would describe the character of the
protest as being energetic, angry, but contained. A huge crowd moved in and
around civic center, chanting and being rowdy, but clearly was being led.
After a last hurrah at the pavilion it seemed that the organized event was
"over". We stayed a while longer, and eventually headed back home. I don't feel
that people really left the park at the same time we did; mostly everyone just
dispersed around the park and found somewhere to keep hanging out.
Tonight there has been an 8pm curfew. The police lined up on the north side of
the park, armored and clearly ready for action. We watched all of this on the
live news stations, gritting our teeth through the comentary of their reporters.
As the police stood there, the clock counting down to 8, the protesters grew
more and more irritated. They taunted the police, and formed a line of their
own. The braver (or more dramatic) protesters walked around in the no-man's land
between them, occasionally earning themselves some teargas.
The police began pushing forward just before 8 a little, but began pushing in
earnest just after 8, after the howling. They would advance, wait, advance, wait
again. An armada of police cars, ambulance, and fire trucks followed the line as
it advanced.
The police did not give the protesters anywhere to go except into Capital Hill,
southeast of Civic Center Park. We watched as a huge crowd marched past the
front of our house, chanting their call and response: "What's his name?" "GEORGE
FLOYD". The feeling wasn't of violence still, just anger. Indignant at a curfew
aimed at quelling a movement, the protesters simply kept moving. The police were
never far behind.
We sat on our front stoop with our neighbors and watched the night unfold. I
don't think a single person in our building or the buildings to the left and
right of us hadn't gone to protest today in some capacity. We came back from our
various outings and sat out front, watching the crowds and patrolling up and
down the street to keep tabs on things.
Around 9pm the fires started. We saw them on the news, and in person. They were
generally dumpster fires, generally placed such that they were away from
buildings, clearly being done more to be annoying than to accomplish anything
specific. A very large set of fires was started a block south of us, in the
middle of the street. The fire department was there within a few minutes to put
those out, before moving on.
From the corner of my eye, sitting back on the stoop, I noticed our neighbors
running into their backyard. We ran after them, and they told us there was a
dumpster fire in our alley. They were running with fire extinguishers, and we
ran inside to grab some of our own. By the time we got to the backyard the fire
was only smouldering, and the fire department was coming down the alley. We
scurried back into the backyard. A few minutes later I peeked my head around the
corner, into the alley, to see what happening. I was greeted by at least two
police in riot gear, guarding the dumpster as the fire department worked. They
saw me but didn't move, and I quickly retreated back to the yard.
Talking to our neighbor later we found out she had seen a group of about 10
people back there, and watched them jump the fence into another backyard in
order to escape the alley. She thinks they, or some subset of them, started the
fire. She looked one in the eye, she says, and didn't get the impression they
were trying to cause damage, just to make a statement.
The fires stopped not long after that, it seems. We're pretty sure the fire
trucks were just driving up and down the main roads, looking into alleys and
stopping all fires they could find. In all this time the police didn't do much.
They would hold a line, but never chase anyone. Even now, as I write this around
midnight, people are still out, meandering around in small groups, and police
are present but not really doing anything.
It's hard to get a good view of everything though. All we have is livestreams on
youtube to go on at this point. There's a couple intrepid amateur reporters out
there, getting into the crowds and streaming events as they happen. Right now
we're watching people moving down Lincoln towards Civic Center Park, some of
them trying to smash windows of buildings as they go.
The violence of these protests is going to be the major story of tonight, I know
that already. That I know of there's been 3 police injured, some broken
windows, and quite a bit of graffiti. I do believe the the tactic of pushing
everyone into Cap Hill had the desired effect of reducing looting (again, as far
as I can tell so far), but at that expense of those who live here who have to
endure latent tear gas, dumpster fires, and sirens all through the night.
Even now, at midnight, from what I can see from my porch and from these live
streams, the protesters are not violent. At worst they are guilty of a lot of
loitering. The graffiti, the smashed windows, the injured officers, all of these
things will be held up as examples of the anarchy and violence inherent to the
protesters. But I don't think that's an honest picture. The vast, vast majority
of those out right now are civily disobeying an unjust curfew, trying to keep
the energy of the movement alive.
My thoughts about these things are complicated. When turning a corner on the
street I'm far more afraid to see the police than to see other protesters. The
fires have been annoying, and stupid, and unhelpful, but were never threatening.
The violence is stupid, though I don't shed many tears for a looted Chili's or
Papa Johns. The police have actually shown more restraint than I expected in all
of this, though funneling the protest into a residential neighborhood was an
incredibly stupid move. Could the protesters not have just stayed in the park?
Yes, the park would likely have been turned into an encampment, but it was
already heading into that direction due to Covid-19. Overall, this night didn't
need to be so hard, but Denver handled this well.
But, it's only 1am, and the night has a long way to go. Things could still get
worse. Even now I'm watching people trying to break into the supreme court
building. Civic Center Park appears to be very populated again, and the police
are very present there again. It's possible I may eat my words.
# Monday, June 1st
Yesterday was quite a bit more tame than the craziness Saturday. I woke up
Sunday morning feeling antsy, and rode my bike around to see the damage. I had a
long conversation with a homeless man named Gary in Civic Center Park. He was
pissed, and had a lot to say about the "suburban kids" destroying the park he
and many others live in, causing it to be shut down and tear gassed. The
protesters saw it as a game, according to him, but it was life and death for the
homeless; three of his guys got beat up in the street, and neither police nor
protesters stopped it.
Many people had shown up to the park early to help clean it up. Apart from the
graffiti, which was also in the process of being cleaned, it was hard to tell
anything had actually happened. Gary had some words about them as well, that
they were only there for the gram and some pats on the back, but once they left
his life would be back as it was. I could feel that, but I also appreciated that
people were cognizant that damage was being done and were willing to do
something about it.
I rode around 16th street mall, down colfax, and back up 13th, looking to see if
anything had happened. For the most part there was no damage, save the graffiti.
A mediterranean restaurant got its windows smashed, as well as the Office Depot.
The restaurant was unfortunate, Office Depot will be ok.
The protest yesterday was much more peaceful. The cops were nowhere to be found
when curfew hit, but did eventually show up when the protest moved down Colfax.
They had lined the streets around their precinct building there, but for the
most part the protesters just kept walking. This is when the "violence" started.
The cops moved into the street, forming a line across Colfax behind the
protesters. Police cars and vans started moving. As the protest turned back,
presumably to head back to the capitol lawn, it ran into the riot line.
Predictably, everyone scattered. The cat-and-mouse game had begun, which meant
dumpster fires, broken windows, tear gas, and all the rest. Watching the whole
thing it was extremely clear to us, though not the news casters, unfortunately,
that if the police hadn't moved out into Colfax nothing would have ever
happened. Instead, the news casters lamented that people were bringing things
like helmets, gas masks, traffic cones, shields, etc... and so were clearly not there
"for the right reasons".
The thing that the news casters couldn't seem to grasp was that the police
attempting to control these situations are what are catalyzing them in the first
place. These are protests _against_ the police, they cannot take place under the
terms the police choose. If the police were not here setting terms, but instead
working with the peaceful protesters (the vast, vast majority) to quell the
violence, no one would be here with helmets, gas masks, traffic cones,
shields... But instead the protesters feel they need to protect themselves in
order to be heard, and the police feel they have to exercise their power to
maintain control, and so the situation degrades.

View File

@ -1,155 +0,0 @@
---
title: >-
Visualization 3
description: >-
All the pixels.
series: viz
tags: tech art
---
<canvas id="canvas" style="padding-bottom: 2rem;"></canvas>
This visualization is built from the ground up. On every frame a random set of
pixels is chosen. Each chosen pixel calculates the average of its color and the
color of a random neighbor. Some random color drift is added in as well. It
replaces its own color with that calculated color.
Choosing a neighbor is done using the "asteroid rule", ie a pixel at the very
top row is considered to be the neighbor of the pixel on the bottom row of the
same column.
Without the asteroid rule the pixels would all eventually converge into a single
uniform color, generally a light blue, due to the colors at the edge, the reds,
being quickly averaged away. With the asteroid rule in place the canvas has no
edges, thus no position on the canvas is favored and balance can be maintained.
<script type="text/javascript">
let rectSize = 12;
function randn(n) {
return Math.floor(Math.random() * n);
}
let canvas = document.getElementById("canvas");
canvas.width = window.innerWidth - (window.innerWidth % rectSize);
canvas.height = window.innerHeight- (window.innerHeight % rectSize);
let ctx = canvas.getContext("2d");
let w = canvas.width / rectSize;
let h = canvas.height / rectSize;
let matrices = new Array(2);
matrices[0] = new Array(w);
matrices[1] = new Array(w);
for (let x = 0; x < w; x++) {
matrices[0][x] = new Array(h);
matrices[1][x] = new Array(h);
for (let y = 0; y < h; y++) {
let el = {
h: 360 * (x / w),
s: "100%",
l: "50%",
};
matrices[0][x][y] = el;
matrices[1][x][y] = el;
}
}
// draw initial canvas, from here on out only individual rectangles will be
// filled as they get updated.
for (let x = 0; x < w; x++) {
for (let y = 0; y < h; y++) {
let el = matrices[0][x][y];
ctx.fillStyle = `hsl(${el.h}, ${el.s}, ${el.l})`;
ctx.fillRect(x * rectSize, y * rectSize, rectSize, rectSize);
}
}
let requestAnimationFrame =
window.requestAnimationFrame ||
window.mozRequestAnimationFrame ||
window.webkitRequestAnimationFrame ||
window.msRequestAnimationFrame;
let neighbors = [
[-1, -1], [0, -1], [1, -1],
[-1, 0], [1, 0],
[-1, 1], [0, 1], [1, 1],
];
function randNeighborAsteroid(matrix, x, y) {
let neighborCoord = neighbors[randn(neighbors.length)];
let neighborX = x+neighborCoord[0];
let neighborY = y+neighborCoord[1];
neighborX = (neighborX + w) % w;
neighborY = (neighborY + h) % h;
return matrix[neighborX][neighborY];
}
function randNeighbor(matrix, x, y) {
while (true) {
let neighborCoord = neighbors[randn(neighbors.length)];
let neighborX = x+neighborCoord[0];
let neighborY = y+neighborCoord[1];
if (neighborX < 0 || neighborX >= w || neighborY < 0 || neighborY >= h) {
continue;
}
return matrix[neighborX][neighborY];
}
}
let drift = 10;
function genChildH(elA, elB) {
// set the two h values, h1 <= h2
let h1 = elA.h;
let h2 = elB.h;
if (h1 > h2) {
h1 = elB.h;
h2 = elA.h;
}
// diff must be between 0 (inclusive) and 360 (exclusive). If it's greater
// than 180 then it's not the shortest path around, that must be the other
// way around the circle.
let hChild;
let diff = h2 - h1;
if (diff > 180) {
diff = 360 - diff;
hChild = h2 + (diff / 2);
} else {
hChild = h1 + (diff / 2);
}
hChild += (Math.random() * drift * 2) - drift;
hChild = (hChild + 360) % 360;
return hChild;
}
let tick = 0;
function doTick() {
tick++;
let currI = tick % 2;
let curr = matrices[currI];
let lastI = (tick - 1) % 2;
let last = matrices[lastI];
for (let i = 0; i < (w * h / 2); i++) {
let x = randn(w);
let y = randn(h);
if (curr[x][y].lastTick == tick) continue;
let neighbor = randNeighborAsteroid(last, x, y);
curr[x][y].h = genChildH(curr[x][y], neighbor);
curr[x][y].lastTick = tick;
ctx.fillStyle = `hsl(${curr[x][y].h}, ${curr[x][y].s}, ${curr[x][y].l})`;
ctx.fillRect(x * rectSize, y * rectSize, rectSize, rectSize);
}
matrices[currI] = curr;
requestAnimationFrame(doTick);
}
requestAnimationFrame(doTick);
</script>

View File

@ -1,353 +0,0 @@
---
title: >-
Component-Oriented Programming
description: >-
A concise description of.
tags: tech
---
[A previous post in this
blog](/2019/08/02/program-structure-and-composability.html) focused on a
framework developed to make designing component-based programs easier. In
retrospect, the proposed pattern/framework was over-engineered. This post
attempts to present the same ideas in a more distilled form, as a simple
programming pattern and without the unnecessary framework.
## Components
Many languages, libraries, and patterns make use of a concept called a
"component," but in each case the meaning of "component" might be slightly
different. Therefore, to begin talking about components, it is necessary to first
describe what is meant by "component" in this post.
For the purposes of this post, the properties of components include the
following.
&nbsp;1... **Abstract**: A component is an interface consisting of one or more
methods.
&nbsp;&nbsp;&nbsp;1a... A function might be considered a single-method component
_if_ the language supports first-class functions.
&nbsp;&nbsp;&nbsp;1b... A component, being an interface, may have one or more
implementations. Generally, there will be a primary implementation, which is
used during a program's runtime, and secondary "mock" implementations, which are
only used when testing other components.
&nbsp;2... **Instantiatable**: An instance of a component, given some set of
parameters, can be instantiated as a standalone entity. More than one of the
same component can be instantiated, as needed.
&nbsp;3... **Composable**: A component may be used as a parameter of another
component's instantiation. This would make it a child component of the one being
instantiated (the parent).
&nbsp;4... **Pure**: A component may not use mutable global variables (i.e.,
singletons) or impure global functions (e.g., system calls). It may only use
constants and variables/components given to it during instantiation.
&nbsp;5... **Ephemeral**: A component may have a specific method used to clean
up all resources that it's holding (e.g., network connections, file handles,
language-specific lightweight threads, etc.).
&nbsp;&nbsp;&nbsp;5a... This cleanup method should _not_ clean up any child
components given as instantiation parameters.
&nbsp;&nbsp;&nbsp;5b... This cleanup method should not return until the
component's cleanup is complete.
&nbsp;&nbsp;&nbsp;5c... A component should not be cleaned up until all its
parent components are cleaned up.
Components are composed together to create component-oriented programs. This is
done by passing components as parameters to other components during
instantiation. The `main` procedure of the program is responsible for
instantiating and composing the components of the program.
## Example
It's easier to show than to tell. This section posits a simple program and then
describes how it would be implemented in a component-oriented way. The program
chooses a random number and exposes an HTTP interface that allows users to try
and guess that number. The following are requirements of the program:
* A guess consists of a name that identifies the user performing the guess and
the number that is being guessed;
* A score is kept for each user who has performed a guess;
* Upon an incorrect guess, the user should be informed of whether they guessed
too high or too low, and 1 point should be deducted from their score;
* Upon a correct guess, the program should pick a new random number against
which to check subsequent guesses, and 1000 points should be added to the
user's score;
* The HTTP interface should have two endpoints: one for users to submit guesses,
and another that lists out user scores from highest to lowest;
* Scores should be saved to disk so they survive program restarts.
It seems clear that there will be two major areas of functionality for our
program: score-keeping and user interaction via HTTP. Each of these can be
encapsulated into components called `scoreboard` and `httpHandlers`,
respectively.
`scoreboard` will need to interact with a filesystem component to save/restore
scores (because it can't use system calls directly; see property 4). It would be
wasteful for `scoreboard` to save the scores to disk on every score update, so
instead it will do so every 5 seconds. A time component will be required to
support this.
`httpHandlers` will be choosing the random number which is being guessed, and
will therefore need a component that produces random numbers. `httpHandlers`
will also be recording score changes to `scoreboard`, so it will need access to
`scoreboard`.
The example implementation will be written in go, which makes differentiating
HTTP handler functionality from the actual HTTP server quite easy; thus, there
will be an `httpServer` component that uses `httpHandlers`.
Finally, a `logger` component will be used in various places to log useful
information during runtime.
[The example implementation can be found
here.](/assets/component-oriented-design/v1/main.html) While most of it can be
skimmed, it is recommended to at least read through the `main` function to see
how components are composed together. Note that `main` is where all components
are instantiated, and that all components' take in their child components as
part of their instantiation.
## DAG
One way to look at a component-oriented program is as a directed acyclic graph
(DAG), where each node in the graph represents a component, and each edge
indicates that one component depends upon another component for instantiation.
For the previous program, it's quite easy to construct such a DAG just by
looking at `main`, as in the following:
```
net.Listener rand.Rand os.File
^ ^ ^
| | |
httpServer --> httpHandlers --> scoreboard --> time.Ticker
| | |
+---------------+---------------+--> log.Logger
```
Note that all the leaves of the DAG (i.e., nodes with no children) describe the
points where the program meets the operating system via system calls. The leaves
are, in essence, the program's interface with the outside world.
While it's not necessary to actually draw out the DAG for every program one
writes, it can be helpful to at least think about the program's structure in
these terms.
## Benefits
Looking at the previous example implementation, one would be forgiven for having
the immediate reaction of "This seems like a lot of extra work for little gain.
Why can't I just make the system calls where I need to, and not bother with
wrapping them in interfaces and all these other rules?"
The following sections will answer that concern by showing the benefits gained
by following a component-oriented pattern.
### Testing
Testing is important, that much is being assumed.
A distinction to be made with testing is between unit and non-unit tests. Unit
tests are those for which there are no requirements for the environment outside
the test, such as the existence of global variables, running databases,
filesystems, or network services. Unit tests do not interact with the world
outside the testing procedure, but instead use mocks in place of the
functionality that would be expected by that world.
Unit tests are important because they are faster to run and more consistent than
non-unit tests. Unit tests also force the programmer to consider different
possible states of a component's dependencies during the mocking process.
Unit tests are often not employed by programmers, because they are difficult to
implement for code that does not expose any way to swap out dependencies for
mocks of those dependencies. The primary culprit of this difficulty is the
direct usage of singletons and impure global functions. For component-oriented
programs, all components inherently allow for the swapping out of any
dependencies via their instantiation parameters, so there's no extra effort
needed to support unit tests.
[Tests for the example implementation can be found
here.](/assets/component-oriented-design/v1/main_test.html) Note that all
dependencies of each component being tested are mocked/stubbed next to them.
### Configuration
Practically all programs require some level of runtime configuration. This may
take the form of command-line arguments, environment variables, configuration
files, etc.
For a component-oriented program, all components are instantiated in the same
place, `main`, so it's very easy to expose any arbitrary parameter to the user
via configuration. For any component that is affected by a configurable
parameter, that component merely needs to take an instantiation parameter for
that configurable parameter; `main` can connect the two together. This accounts
for the unit testing of a component with different configurations, while still
allowing for the configuration of any arbitrary internal functionality.
For more complex configuration systems, it is also possible to implement a
`configuration` component that wraps whatever configuration-related
functionality is needed, which other components use as a sub-component. The
effect is the same.
To demonstrate how configuration works in a component-oriented program, the
example program's requirements will be augmented to include the following:
* The point change values for both correct and incorrect guesses (currently
hardcoded at 1000 and 1, respectively) should be configurable on the
command-line;
* The save file's path, HTTP listen address, and save interval should all be
configurable on the command-line.
[The new implementation, with newly configurable parameters, can be found
here.](/assets/component-oriented-design/v2/main.html) Most of the program has
remained the same, and all unit tests from before remain valid. The primary
difference is that `scoreboard` takes in two new parameters for the point change
values, and configuration is set up inside `main` using the `flags` package.
### Setup/Runtime/Cleanup
A program can be split into three stages: setup, runtime, and cleanup. Setup is
the stage during which the internal state is assembled to make runtime possible.
Runtime is the stage during which a program's actual function is being
performed. Cleanup is the stage during which the runtime stops and internal
state is disassembled.
A graceful (i.e., reliably correct) setup is quite natural to accomplish for
most. On the other hand, a graceful cleanup is, unfortunately, not a programmer's
first concern (if it is a concern at all).
When building reliable and correct programs, a graceful cleanup is as important
as a graceful setup and runtime. A program is still running while it is being
cleaned up, and it's possibly still acting on the outside world. Shouldn't
it behave correctly during that time?
Achieving a graceful setup and cleanup with components is quite simple.
During setup, a single-threaded procedure (`main`) first constructs the leaf
components, then the components that take those leaves as parameters, then the
components that take _those_ as parameters, and so on, until the component DAG
is fully constructed.
At this point, the program's runtime has begun.
Once the runtime is over, signified by a process signal or some other mechanism,
it's only necessary to call each component's cleanup method (if any; see
property 5) in the reverse of the order in which the components were
instantiated. This order is inherently deterministic, as the components were
instantiated by a single-threaded procedure.
Inherent to this pattern is the fact that each component will certainly be
cleaned up before any of its child components, as its child components must have
been instantiated first, and a component will not clean up child components
given as parameters (properties 5a and 5c). Therefore, the pattern avoids
use-after-cleanup situations.
To demonstrate a graceful cleanup in a component-oriented program, the example
program's requirements will be augmented to include the following:
* The program will terminate itself upon an interrupt signal;
* During termination (cleanup), the program will save the latest set of scores
to disk one final time.
[The new implementation that accounts for these new requirements can be found
here.](/assets/component-oriented-design/v3/main.html) For this example, go's
`defer` feature could have been used instead, which would have been even
cleaner, but was omitted for the sake of those using other languages.
## Conclusion
The component pattern helps make programs more reliable with only a small amount
of extra effort incurred. In fact, most of the pattern has to do with
establishing sensible abstractions around global functionality and remembering
certain idioms for how those abstractions should be composed together, something
most of us already do to some extent anyway.
While beneficial in many ways, component-oriented programming is merely a tool
that can be applied in many cases. It is certain that there are cases where it
is not the right tool for the job, so apply it deliberately and intelligently.
## Criticisms/Questions
In lieu of a FAQ, I will attempt to premeditate questions and criticisms of the
component-oriented programming pattern laid out in this post.
**This seems like a lot of extra work.**
Building reliable programs is a lot of work, just as building a
reliable _anything_ is a lot of work. Many of us work in an industry that likes
to balance reliability (sometimes referred to by the more specious "quality")
with malleability and deliverability, which naturally leads to skepticism of any
suggestions requiring more time spent on reliability. This is not necessarily a
bad thing, it's just how the industry functions.
All that said, a pattern need not be followed perfectly to be worthwhile, and
the amount of extra work incurred by it can be decided based on practical
considerations. I merely maintain that code which is (mostly) component-oriented
is easier to maintain in the long run, even if it might be harder to get off the
ground initially.
**My language makes this difficult.**
I don't know of any language which makes this pattern particularly easier than
others, so, unfortunately, we're all in the same boat to some extent (though I
recognize that some languages, or their ecosystems, make it more difficult than
others). It seems to me that this pattern shouldn't be unbearably difficult for
anyone to implement in any language either, however, as the only language
feature required is abstract typing.
It would be nice to one day see a language that explicitly supports this
pattern by baking the component properties in as compiler-checked rules.
**My `main` is too big**
There's no law saying all component construction needs to happen in `main`,
that's just the most sensible place for it. If there are large sections of your
program that are independent of each other, then they could each have their own
construction functions that `main` then calls.
Other questions that are worth asking include: Can my program be split up
into multiple programs? Can the responsibilities of any of my components be
refactored to reduce the overall complexity of the component DAG? Can the
instantiation of any components be moved within their parent's
instantiation function?
(This last suggestion may seem to be disallowed, but is fine as long as the
parent's instantiation function remains pure.)
**Won't this will result in over-abstraction?**
Abstraction is a necessary tool in a programmer's toolkit, there is simply no
way around it. The only questions are "how much?" and "where?"
The use of this pattern does not affect how those questions are answered, in my
opinion, but instead aims to more clearly delineate the relationships and
interactions between the different abstracted types once they've been
established using other methods. Over-abstraction is possible and avoidable
regardless of which language, pattern, or framework is being used.
**Does CoP conflict with object-oriented or functional programming?**
I don't think so. OoP languages will have abstract types as part of their core
feature-set; most difficulties are going to be with deliberately _not_ using
other features of an OoP language, and with imported libraries in the language
perhaps making life inconvenient by not following CoP (specifically regarding
cleanup and the use of singletons).
For functional programming, it may well be that, depending on the language, CoP
is technically being used, as functional languages are already generally
antagonistic toward globals and impure functions, which is most of the battle.
If anything, the transition from functional to component-oriented programming
will generally be an organizational task.

View File

@ -1,50 +0,0 @@
---
title: >-
New Year, New Resolution
description: >-
This blog is about to get some action.
---
At this point I'm fairly well known amongst friends and family for my new year's
resolutions, to the point that earlier this month a friend of mine asked me
"What's it going to be this year?". In the past I've done things like no
chocoloate, no fast food, no added sugar (see a theme?), and no social media.
They've all been of the "I won't do this" sort, because it's a lot easier to
stop doing something than to start doing something new. Doing something new
inherently means _also_ not doing something else; there's only so many hours in
the day, afterall.
## This Year
This year I'm going to shake things up, I'm going to do something new. My
resolution is to have published 52 posts on this blog by Jan 1, 2022, 00:00 UTC.
Only one post per day can count towards the 52. A post must be "substantial" to
count towards the 52. A non-substantial post would be something like the 100
word essay about my weekend that I wrote in first grade, which went something
like "My weekend was really really really ('really' 96 more times) really really
boring".
Other than that, it's pretty open-ended.
## Why
My hope is that I'll get more efficient at writing these things. Usually I take
a lot of time to craft a post, weeks in some cases. I really appreciate those of
you that have taken the time to read them, but to be frank the time commitment
just isn't worth it. With practice I can hopefully learn what exactly I have to
say that others are interested in, and then go back to spending a lot of time
crafting the things being said.
Another part of this is going to be learning how to market myself properly,
something I've always been reticent to do. Our world is filled with people
shouting into the void of the internet, each with their own reasons for wanting
to be heard. Does it need another? Probably not. But here I am. I guess what I'm
really going to be doing is learning _why_ I want to do this; I know I want to
have others read what I write, but is it possible that that desire isn't
entirely selfish? Is it ok if it is?
Once I'm comfortable with why I'm doing this it will, hopefully, be easier to
figure out a marketing avenue I feel comfortable with putting a lot of energy
towards. There must be at least _one_...
So consider this #1, world. Only 51 to go.

View File

@ -1,354 +0,0 @@
---
title: >-
Ginger
description: >-
Yes, it does exist.
series: ginger
tags: tech
---
This post is about a programming language that's been bouncing around in my head
for a _long_ time. I've tried to actually implement the language three or more
times now, but everytime I get stuck or run out of steam. It doesn't help that
everytime I try again the form of the language changes significantly. But all
throughout the name of the language has always been "Ginger". It's a good name.
In the last few years the form of the language has somewhat solidified in my
head, so in lieu of actually working on it I'm going to talk about what it
currently looks like.
## Abstract Syntax Lists
_In the beginning_ there was assembly. Well, really in the beginning there were
punchcards, and probably something even more esoteric before that, but it was
all effectively the same thing: a list of commands the computer would execute
sequentially, with the ability to jump to odd places in the sequence depending
on conditions at runtime. For the purpose of this post, we'll call this class of
languages "abstract syntax list" (ASL) languages.
Here's a hello world program in my favorite ASL language, brainfuck:
```
++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.++
+.------.--------.>>+.>++.
```
(If you've never seen brainfuck, it's deliberately unintelligible. But it _is_
an ASL, each character representing a single command, executed by the brainfuck
runtime from left to right.)
ASLs did the job at the time, but luckily we've mostly moved on past them.
## Abstract Syntax Trees
Eventually programmers upgraded to C-like languages. Rather than a sequence of
commands, these languages were syntactically represented by an "abstract syntax
tree" (AST). Rather than executing commands in essentially the same order they
are written, an AST language compiler reads the syntax into a tree of syntax
nodes. What it then does with the tree is language dependent.
Here's a program which outputs all numbers from 0 to 9 to stdout, written in
(slightly non-idiomatic) Go:
```go
i := 0
for {
if i == 10 {
break
}
fmt.Println(i)
i++
}
```
When the Go compiler sees this, it's going to first parse the syntax into an
AST. The AST might look something like this:
```
(root)
|-(:=)
| |-(i)
| |-(0)
|
|-(for)
|-(if)
| |-(==)
| | |-(i)
| | |-(10)
| |
| |-(break)
|
|-(fmt.Println)
| |-(i)
|
|-(++)
|-(i)
```
Each of the non-leaf nodes in the tree represents an operation, and the children
of the node represent the arguments to that operation, if any. From here the
compiler traverses the tree depth-first in order to turn each operation it finds
into the appropriate machine code.
There's a sub-class of AST languages called the LISP ("LISt Processor")
languages. In a LISP language the AST is represented using lists of elements,
where the first element in each list denotes the operation and the rest of the
elements in the list (if any) represent the arguments. Traditionally each list
is represented using parenthesis. For example `(+ 1 1)` represents adding 1 and
1 together.
As a more complex example, here's how to print numbers 0 through 9 to stdout
using my favorite (and, honestly, only) LISP, Clojure:
```clj
(doseq
[n (range 10)]
(println n))
```
Much smaller, but the idea is there. In LISPs there is no differentiation
between the syntax, the AST, and the language's data structures; they are all
one and the same. For this reason LISPs generally have very powerful macro
support, wherein one uses code written in the language to transform code written
in that same language. With macros users can extend a language's functionality
to support nearly anything they need to, but because macro generation happens
_before_ compilation they can still reap the benefits of compiler optimizations.
### AST Pitfalls
The ASL (assembly) is essentially just a thin layer of human readability on top
of raw CPU instructions. It does nothing in the way of representing code in the
way that humans actually think about it (relationships of types, flow of data,
encapsulation of behavior). The AST is a step towards expressing code in human
terms, but it isn't quite there in my opinion. Let me show why by revisiting the
Go example above:
```go
i := 0
for {
if i > 9 {
break
}
fmt.Println(i)
i++
}
```
When I understand this code I don't understand it in terms of its syntax. I
understand it in terms of what it _does_. And what it does is this:
* with a number starting at 0, start a loop.
* if the number is greater than 9, stop the loop.
* otherwise, print the number.
* add one to the number.
* go to start of loop.
This behavior could be further abstracted into the original problem statement,
"it prints numbers 0 through 9 to stdout", but that's too general, as there
are different ways for that to be accomplished. The Clojure example first
defines a list of numbers 0 through 9 and then iterates over that, rather than
looping over a single number. These differences are important when understanding
what code is doing.
So what's the problem? My problem with ASTs is that the syntax I've written down
does _not_ reflect the structure of the code or the flow of data which is in my
head. In the AST representation if you want to follow the flow of data (a single
number) you _have_ to understand the semantic meaning of `i` and `:=`; the AST
structure itself does not convey how data is being moved or modified.
Essentially, there's an extra implicit transformation that must be done to
understand the code in human terms.
## Ginger: An Abstract Syntax Graph Language
In my view the next step is towards using graphs rather than trees for
representing our code. A graph has the benefit of being able to reference
"backwards" into itself, where a tree cannot, and so can represent the flow of
data much more directly.
I would like Ginger to be an ASG language where the language is the graph,
similar to a LISP. But what does this look like exactly? Well, I have a good
idea about what the graph _structure_ will be like and how it will function, but
the syntax is something I haven't bothered much with yet. Representing graph
structures in a text file is a problem to be tackled all on its own. For this
post we'll use a made-up, overly verbose, and probably non-usable syntax, but
hopefully it will convey the graph structure well enough.
### Nodes, Edges, and Tuples
All graphs have nodes, where each node contains a value. A single unique value
can only have a single node in a graph. Nodes are connected by edges, where
edges have a direction and can contain a value themselves.
In the context of Ginger, a node represents a value as expected, and the value
on an edge represents an operation to take on that value. For example:
```
5 -incr-> n
```
`5` and `n` are both nodes in the graph, with an edge going from `5` to `n` that
has the value `incr`. When it comes time to interpret the graph we say that the
value of `n` can be calculated by giving `5` as the input to the operation
`incr` (increment). In other words, the value of `n` is `6`.
What about operations which have more than one input value? For this Ginger
introduces the tuple to its graph type. A tuple is like a node, except that it's
anonymous, which allows more than one to exist within the same graph, as they do
not share the same value. For the purposes of this blog post we'll represent
tuples like this:
```
1 -> } -add-> t
2 -> }
```
`t`'s value is the result of passing a tuple of two values, `1` and `2`, as
inputs to the operation `add`. In other words, the value of `t` is `3`.
For the syntax being described in this post we allow that a single contiguous
graph can be represented as multiple related sections. This can be done because
each node's value is unique, so when the same value is used in disparate
sections we can merge the two sections on that value. For example, the following
two graphs are exactly equivalent (note the parenthesis wrapping the graph which
has been split):
```
1 -> } -add-> t -incr-> tt
2 -> }
```
```
(
1 -> } -add-> t
2 -> }
t -incr-> tt
)
```
(`tt` is `4` in both cases.)
A tuple with only one input edge, a 1-tuple, is a no-op, semantically, but can
be useful structurally to chain multiple operations together without defining
new value names. In the above example the `t` value can be eliminated using a
1-tuple.
```
1 -> } -add-> } -incr-> tt
2 -> }
```
When an integer is used as an operation on a tuple value then the effect is to
output the value in the tuple at that index. For example:
```
1 -> } -0-> } -incr-> t
2 -> }
```
(`t` is `2`.)
### Operations
When a value sits on an edge it is used as an operation on the input of that
edge. Some operations will no doubt be builtin, like `add`, but users should be
able to define their own operations. This can be done using the `in` and `out`
special values. When a graph is used as an operation it is scanned for both `in`
and `out` values. `in` is set to the input value of the operation, and the value
of `out` is used as the output of the operation.
Here we will define the `incr` operation and then use it. Note that we set the
`incr` value to be an entire sub-graph which represents the operation's body.
```
( in -> } -add-> out
1 -> } ) -> incr
5 -incr-> n
```
(`n` is `6`.)
The output of an operation may itself be a tuple. Here's an implementation and
usage of `double-incr`, which increments two values at once.
```
( in -0-> } -incr-> } -> out
}
in -1-> } -incr-> } ) -> double-incr
1 -> } -double-incr-> t -add-> tt
2 -> }
```
(`t` is a 2-tuple with values `2`, and `3`, `tt` is `5.)
### Conditionals
The conditional is a bit weird, and I'm not totally settled on it yet. For now
we'll use this. The `if` operation expects as an input a 2-tuple whose first
value is a boolean and whose second value will be passed along. The `if`
operation is special in that it has _two_ output edges. The first will be taken
if the boolean is true, the second if the boolean is false. The second value in
the input tuple, the one to be passed along, is used as the input to whichever
branch is taken.
Here is an implementation and usage of `max`, which takes two numbers and
outputs the greater of the two. Note that the `if` operation has two output
edges, but our syntax doesn't represent that very cleanly.
```
( in -gt-> } -if-> } -0-> out
in -> } -> } -1-> out ) -> max
1 -> } -max-> t
2 -> }
```
(`t` is `2`.)
It would be simple enough to create a `switch` macro on top of `if`, to allow
for multiple conditionals to be tested at once.
### Loops
Loops are tricky, and I have two thoughts about how they might be accomplished.
One is to literally draw an edge from the right end of the graph back to the
left, at the point where the loop should occur, as that's conceptually what's
happening. But representing that in a text file is difficult. For now I'll
introduce the special `recur` value, and leave this whole section as TBD.
`recur` is cousin of `in` and `out`, in that it's a special value and not an
operation. It takes whatever value it's set to and calls the current operation
with that as input. As an example, here is our now classic 0 through 9 printer
(assume `println` outputs whatever it was input):
```
// incr-1 is an operation which takes a 2-tuple and returns the same 2-tuple
// with the first element incremented.
( in -0-> } -incr-> } -> out
in -1-> } ) -> incr-1
( in -eq-> } -if-> out
in -> } -> } -0-> } -println-> } -incr-1-> } -> recur ) -> print-range
0 -> } -print-range-> }
10 -> }
```
## Next Steps
This post is long enough, and I think gives at least a basic idea of what I'm
going for. The syntax presented here is _extremely_ rudimentary, and is almost
definitely not what any final version of the syntax would look like. But the
general idea behind the structure is sound, I think.
I have a lot of further ideas for Ginger I haven't presented here. Hopefully as
time goes on and I work on the language more some of those ideas can start
taking a more concrete shape and I can write about them.
The next thing I need to do for Ginger is to implement (again) the graph type
for it, since the last one I implemented didn't include tuples. Maybe I can
extend it instead of re-writing it. After that it will be time to really buckle
down and figure out a syntax. Once a syntax is established then it's time to
start on the compiler!

View File

@ -1,241 +0,0 @@
---
title: >-
The Web
description: >-
What is it good for?
series: nebula
tags: tech
---
With the recent crisis in the US's democratic process, there's been much abuzz
in the world about social media's undoubted role in the whole debacle. The
extent to which the algorithms of Facebook, Twitter, Youtube, TikTok, etc, have
played a role in the radicalization of large segments of the world's population
is one popular topic. Another is the tactics those same companies are now
employing to try and euthanize the monster they made so much ad money in
creating.
I don't want to talk about any of that; there is more to the web than
social media. I want to talk about what the web could be, and to do that I want
to first talk about what it has been.
## Web 1.0
In the 1950's computers were generally owned by large organizations like
companies, universities, and governments. They were used to compute and manage
large amounts of data, and each existed independently of the other.
In the 60's protocols began to be developed which would allow them to
communicate over large distances, and thereby share resources (both
computational and informational).
The funding of ARPANET by the US DoD led to the initial versions of the TCP/IP
protocol in the 70's, still used today as the backbone of virtually all internet
communication. Email also came about from ARPANET around this time.
The 80s saw the growth of the internet across the world, as ARPANET gave way to
NSFNET. It was during this time that the domain name system we use today was
developed. At this point the internet use was still mostly for large
non-commercial organizations; there was little commercial footprint, and little
private access. The first commercially available ISP, which allowed access to
the internet from private homes via dialup, wasn't launched until 1989.
And so we find ourselves in the year 1989, when Tim Berners-Lee (TBL) first
proposed the World-Wide Web (WWW, or "the web"). You can find the original
proposal, which is surprisingly short and non-technical,
[here](https://www.w3.org/Proposal.html).
From reading TBL's proposal it's clear that what he was after was some mechanism
for hosting information on his machine in such a way that others could find and
view it, without it needing to be explicitly sent to them. He includes the
following under the "Applications" header:
> The application of a universal hypertext system, once in place, will cover
> many areas such as document registration, on-line help, project documentation,
> news schemes and so on.
But out of such a humble scope grew one of the most powerful forces of the 21st
century. By the end of 1990 TBL had written the first HTML/HTTP browser and
server. By the end of 1994 sites like IMDB, Yahoo, and Bianca's Smut Shack were
live and being accessed by consumers. The web grew that fast.
In my view the characteristic of the web which catalyzed its adoption so quickly
was the place-ness of it. The web is not just a protocol for transferring
information, like email, but instead is a _place_ where that information lives.
Any one place could be freely linked to any other place, and so complex and
interesting relations could be formed between people and ideas. The
contributions people make on the web can reverberate farther than they would or
could in any other medium precisely because those contributions aren't tied to
some one-off event or a deteriorating piece of physical infrastructure, but are
instead given a home which is both permanent and everywhere.
The other advantage of the web, at the time, was its simplicity. HTML was so
simple it was basically human-readable. A basic HTTP server could be implemented
as a hobby project by anyone in any language. Hosting your own website was a
relatively straightforward task which anyone with a computer and an ISP could
undertake.
This was the environment early adopters of the web found themselves in.
## Web 2.0
The infamous dot-com boom took place in 2001. I don't believe this was a failure
inherent in the principles of the web itself, but instead was a product of
people investing in a technology they didn't fully understand. The web, as it
was then, wasn't really designed with money-making in mind. It certainly allowed
for it, but that wasn't the use-case being addressed.
But of course, in this world we live in, if there's money to be made, it will
certainly be made.
By 2003 the phrase "Web 2.0" started popping up. I remember this. To me "Web
2.0" meant a new aesthetic on the web, complete with bubble buttons and centered
fix-width paragraph boxes. But what "Web 2.0" actually signified wasn't related
to any new technology or aesthetic. It was a new strategy for how companies
could enable use of the web by non-expert users, i.e. users who don't have the
inclination or means to host their own website. Web 2.0 was a strategy for
giving everyone a _place_ of their own on the web.
"Web 2.0" was merely a label given to a movement which had already been in
motion for years. I think the following Wikipedia excerpt describes this period
best:
> In 2004, the term ["Web 2.0"] began its rise in popularity when O'Reilly Media
and MediaLive hosted the first Web 2.0 conference. In their opening remarks,
John Battelle and Tim O'Reilly outlined their definition of the "Web as
Platform", where software applications are built upon the Web as opposed to upon
the desktop. The unique aspect of this migration, they argued, is that
"customers are building your business for you". They argued that the
activities of users generating content (in the form of ideas, text, videos, or
pictures) could be "harnessed" to create value.
In other words, Web 2.0 turned the place-ness of the web into a commodity.
Rather than expect everyone to host, or arrange for the hosting, of their own
corner of the web, the technologists would do it for them for "free"! This
coincided with the increasing complexity of the underlying technology of the
web; websites grew to be flashy, interactive, and stateful applications which
_did_ things rather than be places which _held_ things. The idea of a hyperlink,
upon which the success of the web had been founded, became merely an
implementation detail.
And so the walled gardens began to be built. Myspace was founded in 2003,
Facebook opened to the public in 2006, Digg (the precursor to reddit) was
launched in 2004, Flickr launched in 2004 (and was bought by Yahoo in 2005),
Google bought Blogger in 2003, and Twitter launched in 2006. In effect this
period both opened the web up to everyone and established the way we still use
it today.
It's upon these foundations that current events unfold. We have platforms whose
only incentive is towards capturing new users and holding their attention, to
the exclusion of other platforms, so they can be advertised to. Users are
enticed in because they are being offered a place on the web, a place of their
own to express themselves from, in order to find out the worth of their
expressions to the rest of the world. But they aren't expressing to the world at
large, they are expressing to a social media platform, a business, and so only
the most lucrative of voices are heard.
So much for not wanting to talk about social media.
## Web 3.0
The new hot topic in crypto and hacker circles is "Web 3.0", or the
decentralized web (dweb). The idea is that we can have all the good of the
current web (the accessibility, utility, permanency, etc) without all the bad
(the centralized platforms, censorship, advertising, etc). The way forward to
this utopian dream is by building decentralized applications (dApps).
dApps are constructed in a way where all the users of an application help to
host all the stateful content of that application. If I, as a user, post an
image to a dApp, the idea is that other users of that same dApp would lend their
meager computer resources to ensure my image is never forgotten, and in turn I
would lend mine for theirs.
In practice building successful dApps is enormously difficult for many reasons,
and really I'm not sure there _are_ any successful ones (to date). While I
support the general sentiment behind them, I sometimes wonder about the
efficacy. What people want from the web is a place they can call their own, a
place from which they can express themselves and share their contributions with
others with all the speed and pervasiveness that the internet offers. A dApp is
just another walled garden with specific capabilities; it offers only free
hosting, not free expression.
## Web 2.0b
I'm not here solely to complain (just mostly).
Thinking back to Web 1.0, and specifically to the turning point between 1.0 and
2.0, I'd like to propose that maybe we made a wrong turn. The issue at hand was
that hosting one's own site was still too much of a technical burden, and the
direction we went was towards having businesses host them for us. Perhaps there
was another way.
What are the specific difficulties with hosting one's own site? Here are the
ones I can think of:
* Bad tooling: basically none of the tools you're required to use (web server,
TLS, DNS, your home router) are designed for the average person.
* Aggregiously complex languages: making a site which looks half decent and can
do the things you want requires a _lot_ of knowledge about the underlying
languages (CSS, HTML, Javascript, and whatever your server is written in).
* Single point-of-failure: if your machine is off, your site is down.
* Security: it's important to stay ahead of the hackers, but it takes time to
do so.
* Hostile environment: this is separate from security, and includes difficulties
like dynamic home IPs and bad ISP policies (such as asymetric upload/download
speeds).
These are each separate avenues of attack.
Bad tooling is a result of the fact that devs generally build technology for
themselves or their fellow devs, and only build for others when they're being
paid to do it. This is merely an attitude problem.
Complex languages are really a sub-category of bad tooling. The concesus seems
to be that the average person isn't interested or capable of working in
HTML/CSS/JS. This may be true today, but it wasn't always. Most of my friends in
middle and high school were well within their interest and capability to create
the most heinous MySpace pages the world has ever seen, using nothing but CSS
generators and scraps of shitty JS they found lying around. So what changed? The
tools we use to build those pages did.
A hostile environment is not something any individual can do anything about, but
in the capitalist system we exist in we can at least hold in faith the idea that
eventually us customers will get what we want. It may take a long time, but all
monopolies break eventually, and someone will eventually sell us the internet
access we're asking for. If all other pieces are in place I think we'll have
enough people asking to make a difference.
For single point-of-failure we have to grant that more than one person will be
involved, since the vast majority of people aren't going to be able to keep one
machine online consistently, let alone two or more machines. But I think we all
know at least one person who could keep a machine online with some reliability,
and they probably know a couple of other people who could do so as well. What
I'm proposing is that, rather than building tools for global decentralization,
we need tools for local decentralization, aka federation. We can make it
possible for a group of people to have their presence managed by a subset of
themselves. Those with the ability could help to host the online presence of
their family, friends, churches, etc, if given the right tools.
Security is the hard one, but also in many ways isn't. What most people want
from the web is a place from which to express themselves. Expression doesn't
take much more than a static page, usually, and there's not much attacking one
can do against a static page. Additionally, we've already established that
there's going to be at least a _couple_ of technically minded people involved in
hosting this thing.
So that's my idea that I'd like to build towards. First among these ideas is
that we need tools which can help people help each other host their content, and
on top of that foundation a new web can be built which values honest expression
rather than the lucrative madness which our current algorithms love so much.
This project was already somewhat started by
[Cryptorado](https://github.com/Cryptorado-Community/Cryptorado-Node) while I
was a regular attendee, but since COVID started my attendance has fallen off.
Hopefully one day it can resume. In the meantime I'm going to be working on
setting up these tools for myself, and see how far I can get.

View File

@ -1,247 +0,0 @@
---
title: >-
Goodbye, Github Pages
description: >-
This blog is no longer sponsored by Microsoft!
tags: tech
series: selfhost
---
Slowly but surely I'm working on moving my digital life back to being
self-hosted, and this blog was an easy low-hanging fruit to tackle. Previously
the blog was hosted on Github Pages, which was easy enough but also in many ways
restricting. By self-hosting I'm able to have a lot more control over the
generation, delivery, and functionality of the blog.
For reference you can find the source code for the blog at
[{{site.repository}}]({{site.repository}}). Yes, it will one day be hosted
elsewhere as well.
## Nix
Nix is something I'm slowly picking up, but the more I use it the more it grows
on me. Rather than littering my system with ruby versions and packages I'll
never otherwise use, nix allows me to create a sandboxed build pipeline for the
blog with perfectly reproducible results.
The first step in this process is to take the blog's existing `Gemfile.lock` and
turn it into a `gemset.nix` file, which is essentially a translation of the
`Gemfile.lock` into a file nix can understand. There's a tool called
[bundix][bundix] which does this, and it can be used from a nix shell without
having to actually install anything:
```
nix-shell -p bundix --run 'bundix'
```
The second step of using nix is to set up a nix expression in the file
`default.nix`. This will actually build the static files. As a bonus I made my
expression to also allow for serving the site locally with dynamic updating
everytime I change a source file. My `default.nix` looks like this:
```
{
# pkgs refers to all "builtin" nix pkgs and utilities. By importing from a
# URL I'm able to always pin this default.nix to a specific version of those
# packages.
pkgs ? import (fetchTarball "https://github.com/NixOS/nixpkgs/archive/cd63096d6d887d689543a0b97743d28995bc9bc3.tar.gz") {},
system ? builtins.currentSystem,
}:
let
# bundlerEnv looks for a Gemfile, Gemfile.lock, and gemset.nix inside
# gemdir, and derives a package containing ruby and all desired gems.
ruby_env = pkgs.bundlerEnv {
name = "ruby_env";
ruby = pkgs.ruby;
gemdir = ./.;
};
in
{
# build will derive a package which contains the generated static
# files of the blog. It uses the build.sh file (provided below) to
# do this.
build = derivation {
name = "mediocre-blog";
# The build.sh file (source provided below) is executed in order
# to actually build the site.
builder = "${pkgs.bash}/bin/bash";
args = [ ./build.sh ];
# ruby_env is provided as an input to build.sh so that it can
# use jekyll, and the src directory is provided so it can access
# the blog's source files. system is required by the derivation
# function, and stdenv provides standard utilities to build.sh.
inherit ruby_env system;
src = ./src;
stdenv = pkgs.stdenv;
};
# serve will derive an environment specifically tailored for being
# run in a nix-shell. The resulting shell will have ruby_env
# provided for it, and will automatically run the `jekyll serve`
# command to serve the blog locally.
serve = pkgs.stdenv.mkDerivation {
name = "mediocre-blog-shell";
# glibcLocales is required so to fill in LC_ALL and other locale
# related environment vars. Without those jekyll's scss compiler
# fails.
#
# TODO probably get rid of the scss compiler.
buildInputs = [ ruby_env pkgs.glibcLocales ];
shellHook = ''
exec ${ruby_env}/bin/jekyll serve -s ./src -d ./_site -w -I -D
'';
};
}
```
(Nix is a bit tricky to learn, but I highly recommend chapters 14 and 15 of [the
nix manual][manual] for an overview of the language itself, if nothing else.)
The `build.sh` used by the nix expression to actually generate the static files
looks like this:
```bash
# stdenv was given a dependency to build.sh, and so build.sh can use it to
# source in utilities like mkdir, which it needs.
source $stdenv/setup
set -e
# Set up the output directory. nix provides the $out variable which will be the
# root of the derived package's filesystem, but for simplicity later we want to
# output the site within /var/www.
d="$out/var/www/blog.mediocregopher.com"
mkdir -p "$d"
# Perform the jekyll build command. Like stdenv the ruby_env was given as a
# dependency to build.sh, so it has to explicitly use it to have access to
# jekyll. src is another explicit dependency which was given to build.sh, and
# contains all the actual source files within the src directory of the repo.
$ruby_env/bin/jekyll build -s "$src" -d "$d"
```
With these pieces in place I can easily regenerate the site like so:
```
nix-build -A build
```
Once run the static files will exist within a symlink called `result` in the
project's root. Within the symlink will be a `var/www/blog.mediocregopher.com`
tree of directories, and within that will be the generated static files, all
without ever having to have installed ruby.
The expression also allows me to serve the blog while I'm working on it. Doing
so looks like this:
```
nix-shell -A serve
```
When run I get a normal jekyll process running in my `src` directory, serving
the site in real-time on port 4000, once again all without ever installing ruby.
As a final touch I introduced a simple `Makefile` to my repo to wrap these
commands, because even these were too much for me to remember:
```
result:
nix-build -A build
install: result
nix-env -i "$$(readlink result)"
clean:
rm result
rm -rf _site
serve:
nix-shell -A serve
update:
nix-shell -p bundler --run 'bundler update; bundler lock; bundix; rm -rf .bundle vendor'
```
We'll look at that `install` target in the next section.
## nginx
So now I have the means to build my site quickly, reliably, and without
cluttering up the rest of my system. Time to actually serve the files.
My home server has a docker network which houses most of my services that I run,
including nginx. nginx's primary job is to listen on ports 80 and 443, accept
HTTP requests, and direct those requests to their appropriate service based on
their `Host` header. nginx is also great at serving static content from disk, so
I'll take advantage of that for the blog.
The one hitch is that nginx is currently running within a docker container,
as are all my other services. Ideally I would:
* Get rid of the nginx docker container.
* Build a nix package containing nginx, all my nginx config files, and the blog
files themselves.
* Run that directly.
Unfortunately extracting nginx from docker is dependent on doing so for all
other services as well, or at least on running all services on the host network,
which I'm not prepared to do yet. So for now I've done something janky.
If you look at the `Makefile` above you'll notice the `install` target. What
that target does is to install the static blog files to my nix profile, which
exists at `$HOME/.nix-profile`. nix allows any package to be installed to a
profile in this way. All packages within a profile are independent and can be
added, updated, and removed atomically. By installing the built blog package to
my profile I make it available at
`$HOME/.nix-profile/var/www/blog.mediocregopher.com`.
So to serve those files via nginx all I need to do is add a read-only volume to
the container...
```
-v $HOME/.nix-profile/var/www/blog.mediocregopher.com:/var/www/blog.mediocregopher.com:ro \
```
...add a new virtual host to my nginx config...
```
server {
listen 80;
server_name blog.mediocregopher.com;
root /var/www/blog.mediocregopher.com;
}
```
...and finally direct the `blog` A record for `mediocregopher.com` to my home
server's IP. Cloudflare will handle TLS on port 443 for me in this case, as well
as hide my home IP, which is prudent.
## Deploying
So now it's time to publish this new post to the blog, what are the actual
steps? It's as easy as:
```
make clean install
```
This will remove any existing `result`, regenerate the site (with the new post)
under a new symlink, and install/update that newer package to my nix profile,
overwriting the previous package which was there.
EDIT: apparently this isn't quite true. Because `$HOME/.nix-profile` is a
symlink docker doesn't handle the case of that symlink being updated correctly,
so I also have to do `docker restart nginx` for changes to be reflected in
nginx.
And that's it! Nix is a cool tool that I'm still getting the hang of, but
hopefully this post might be useful to anyone else thinking of self-hosting
their site.
[jekyll]: https://jekyllrb.com/
[bundix]: https://github.com/nix-community/bundix
[manual]: https://nixos.org/manual/nix/stable/#chap-writing-nix-expressions

View File

@ -1,390 +0,0 @@
---
title: >-
Building Mobile Nebula
description: >-
Getting my hands dirty with Android development.
series: nebula
tags: tech
---
This post is going to be cheating a bit. I want to start working on adding DNS
resolver configuration to the [mobile nebula][mobile_nebula] app (if you don't
know nebula, [check it out][nebula], it's well worth knowing about), but I also
need to write a blog post for this week, so I'm combining the two exercises.
This post will essentially be my notes from my progress on today's task.
(Protip: listen to [this][heilung] while following along to achieve the proper
open-source programming aesthetic.)
The current mobile nebula app works very well, but it is lacking one major
feature: the ability to specify custom DNS resolvers. This is important because
I want to be able to access resources on my nebula network by their hostname,
not their IP. Android does everything in its power to make DNS configuration
impossible, and essentially the only way to actually accomplish this is by
specifying the DNS resolvers within the app. I go into more details about why
Android is broken [here][dns_issue].
## Setup
Before I can make changes to the app I need to make sure I can correctly build
it in the first place, so that's the major task for today. The first step to
doing so is to install the project's dependencies. As described in the
[mobile_nebula][mobile_nebula] README, the dependencies are:
- [`flutter`](https://flutter.dev/docs/get-started/install)
- [`gomobile`](https://godoc.org/golang.org/x/mobile/cmd/gomobile)
- [`android-studio`](https://developer.android.com/studio)
- [Enable NDK](https://developer.android.com/studio/projects/install-ndk)
It should be noted that as of writing I haven't used any of these tools ever,
and have only done a small amount of android programming, probably 7 or 8 years
ago, so I'm going to have to walk the line between figuring out problems on the
fly and not having to completely learning these entire ecosystems; there's only
so many hours in a weekend, after all.
I'm running [Archlinux][arch] so I install android-studio and flutter by
doing:
```bash
yay -Sy android-studio flutter
```
And I install `gomobile`, according to its [documentation][gomobile] via:
```bash
go get golang.org/x/mobile/cmd/gomobile
gomobile init
```
Now I startup android-studio and go through the setup wizard for it. I choose
standard setup because customized setup doesn't actually offer any interesting
options. Next android-studio spends approximately two lifetimes downloading
dependencies while my eyesight goes blurry because I'm drinking my coffee too
fast.
It's annoying that I need to install these dependencies, especially
android-studio, in order to build this project. A future goal of mine is to nix
this whole thing up, and make a build pipeline where you can provide a full
nebula configuration file and it outputs a custom APK file for that specific
config; zero configuration required at runtime. This will be useful for
lazy/non-technical users who want to be part of the nebula network.
Once android-studio starts up I'm not quite done yet: there's still the NDK
which must be enabled. The instructions given by the link in
[mobile_nebula][mobile_nebula]'s README explain doing this pretty well, but it's
important to install the specific version indicated in the mobile_nebula repo
(`21.0.6113669` at time of writing). Only another 1GB of dependency downloading
to go....
While waiting for the NDK to download I run `flutter doctor` to make sure
flutter is working, and it gives me some permissions errors. [This blog
post][flutter_blog] gives some tips on setting up, and after running the
following...
```bash
sudo groupadd flutterusers
sudo gpasswd -a $USER flutterusers
sudo chown -R :flutterusers /opt/flutter
sudo chmod -R g+w /opt/flutter/
newgrp flutterusers
```
... I'm able to run `flutter doctor`. It gives the following output:
```
[✓] Flutter (Channel stable, 1.22.6, on Linux, locale en_US.UTF-8)
[!] Android toolchain - develop for Android devices (Android SDK version 30.0.3)
✗ Android licenses not accepted. To resolve this, run: flutter doctor --android-licenses
[!] Android Studio
✗ Flutter plugin not installed; this adds Flutter specific functionality.
✗ Dart plugin not installed; this adds Dart specific functionality.
[!] Connected device
! No devices available
! Doctor found issues in 3 categories.
```
The first issue is easily solved as per the instructions given. The second is
solved by finding the plugin manager in android-studio and installing the
flutter plugin (which installs the dart plugin as a dependency, we call that a
twofer).
After installing the plugin the doctor command still complains about not finding
the plugins, but the above mentioned blog post indicates to me that this is
expected. It's comforting to know that the problems indicated by the doctor may
or may not be real problems.
The [blog post][flutter_blog] also indicates that I need `openjdk-8` installed,
so I do:
```bash
yay -S jdk8-openjdk
```
And use the `archlinux-java` command to confirm that that is indeed the default
version for my shell. The [mobile_nebula][mobile_nebula] helpfully expects an
`env.sh` file to exist in the root, so if openjdk-8 wasn't already the default I
could make it so within that file.
## Build
At this point I think I'm ready to try actually building an APK. Thoughts and
prayers required. I run the following in a terminal, since for some reason the
`Build > Flutter > Build APK` dropdown button in android-studio did nothing.
```
flutter build apk
```
It takes quite a while to run, but in the end it errors with:
```
make: 'mobileNebula.aar' is up to date.
cp: cannot create regular file '../android/app/src/main/libs/mobileNebula.aar': No such file or directory
FAILURE: Build failed with an exception.
* Where:
Build file '/tmp/src/mobile_nebula/android/app/build.gradle' line: 95
* What went wrong:
A problem occurred evaluating project ':app'.
> Process 'command './gen-artifacts.sh'' finished with non-zero exit value 1
* Try:
Run with --stacktrace option to get the stack trace. Run with --info or --debug option to get more log output. Run with --scan to get full insights.
* Get more help at https://help.gradle.org
BUILD FAILED in 1s
Running Gradle task 'bundleRelease'...
Running Gradle task 'bundleRelease'... Done 1.7s
Gradle task bundleRelease failed with exit code 1
```
I narrow down the problem to the `./gen-artifacts.sh` script in the repo's root,
which takes in either `android` or `ios` as an argument. Running it directly
as `./gen-artifacts.sh android` results in the same error:
```bash
make: 'mobileNebula.aar' is up to date.
cp: cannot create regular file '../android/app/src/main/libs/mobileNebula.aar': No such file or directory
```
So now I gotta figure out wtf that `mobileNebula.aar` file is. The first thing I
note is that not only is that file not there, but the `libs` directory it's
supposed to be present in is also not there. So I suspect that there's a missing
build step somewhere.
I search for the string `mobileNebula.aar` within the project using
[ag][silver_searcher] and find that it's built by `nebula/Makefile` as follows:
```make
mobileNebula.aar: *.go
gomobile bind -trimpath -v --target=android
```
So that file is made by `gomobile`, good to know! Additionally the file is
actually there in the `nebula` directory, so I suspect there's just a missing
build step to move it into `android/app/src/main/libs`. Via some more `ag`-ing I
find that the code which is supposed to move the `mobileNebula.aar` file is in
the `gen-artifacts.sh` script, but that script doesn't create the `libs` folder
as it ought to. I apply the following diff:
```bash
diff --git a/gen-artifacts.sh b/gen-artifacts.sh
index 601ed7b..4f73b4c 100755
--- a/gen-artifacts.sh
+++ b/gen-artifacts.sh
@@ -16,7 +16,7 @@ if [ "$1" = "ios" ]; then
elif [ "$1" = "android" ]; then
# Build nebula for android
make mobileNebula.aar
- rm -rf ../android/app/src/main/libs/mobileNebula.aar
+ mkdir -p ../android/app/src/main/libs
cp mobileNebula.aar ../android/app/src/main/libs/mobileNebula.aar
else
```
(The `rm -rf` isn't necessary, since a) that file is about to be overwritten by
the subsequent `cp` whether or not it's there, and b) it's just deleting a
single file so the `-rf` is an unnecessary risk).
At this point I re-run `flutter build apk` and receive a new error. Progress!
```
A problem occurred evaluating root project 'android'.
> A problem occurred configuring project ':app'.
> Removing unused resources requires unused code shrinking to be turned on. See http://d.android.com/r/tools/shrink-resources.html for more information.
```
I recall that in the original [mobile_nebula][mobile_nebula] README it mentions
to run the `flutter build` command with the `--no-shrink` option, so I try:
```bash
flutter build apk --no-shrink
```
Finally we really get somewhere. The command takes a very long time to run as it
downloads yet more dependencies (mostly android SDK stuff from the looks of it),
but unfortunately still errors out:
```
Execution failed for task ':app:processReleaseResources'.
> Could not resolve all files for configuration ':app:releaseRuntimeClasspath'.
> Failed to transform mobileNebula-.aar (:mobileNebula:) to match attributes {artifactType=android-compiled-dependencies-resources, org.gradle.status=integration}.
> Execution failed for AarResourcesCompilerTransform: /home/mediocregopher/.gradle/caches/transforms-2/files-2.1/735fc805916d942f5311063c106e7363/jetified-mobileNebula.
> /home/mediocregopher/.gradle/caches/transforms-2/files-2.1/735fc805916d942f5311063c106e7363/jetified-mobileNebula/AndroidManifest.xml
```
Time for more `ag`-ing. I find the file `android/app/build.gradle`, which has
the following block:
```
implementation (name:'mobileNebula', ext:'aar') {
exec {
workingDir '../../'
environment("ANDROID_NDK_HOME", android.ndkDirectory)
environment("ANDROID_HOME", android.sdkDirectory)
commandLine './gen-artifacts.sh', 'android'
}
}
```
I never set up the `ANDROID_HOME` or `ANDROID_NDK_HOME` environment variables,
and I suppose that if I'm running the flutter command outside of android-studio
there wouldn't be a way for flutter to know those values, so I try setting them
within my `env.sh`:
```bash
export ANDROID_HOME=~/Android/Sdk
export ANDROID_NDK_HOME=~/Android/Sdk/ndk/21.0.6113669
```
Re-running the build command still results in the same error. But it occurs to
me that I probably had built the `mobileNebula.aar` without those set
previously, so maybe it was built with the wrong NDK version or something. I
tried deleting `nebula/mobileNebula.aar` and try building again. This time...
new errors! Lots of them! Big ones and small ones!
At this point I'm a bit fed up, and want to try a completely fresh build. I back
up my modified `env.sh` and `gen-artifacts.sh` files, delete the `mobile_nebula`
repo, re-clone it, reinstall those files, and try building again. This time just
a single error:
```
Execution failed for task ':app:lintVitalRelease'.
> Could not resolve all artifacts for configuration ':app:debugRuntimeClasspath'.
> Failed to transform libs.jar to match attributes {artifactType=processed-jar, org.gradle.libraryelements=jar, org.gradle.usage=java-runtime}.
> Execution failed for JetifyTransform: /tmp/src/mobile_nebula/build/app/intermediates/flutter/debug/libs.jar.
> Failed to transform '/tmp/src/mobile_nebula/build/app/intermediates/flutter/debug/libs.jar' using Jetifier. Reason: FileNotFoundException, message: /tmp/src/mobile_nebula/build/app/intermediates/flutter/debug/libs.jar (No such file or directory). (Run with --stacktrace for more details.)
Please file a bug at http://issuetracker.google.com/issues/new?component=460323.
```
So that's cool, apparently there's a bug with flutter and I should file a
support ticket? Well, probably not. It seems that while
`build/app/intermediates/flutter/debug/libs.jar` indeed doesn't exist in the
repo, `build/app/intermediates/flutter/release/libs.jar` _does_, so this appears
to possibly be an issue in declaring which build environment is being used.
After some googling I found [this flutter issue][flutter_issue] related to the
error. Tldr: gradle's not playing nicely with flutter. Downgrading could help,
but apparently building with the `--debug` flag also works. I don't want to
build a release version anyway, so this sits fine with me. I run...
```bash
flutter build apk --no-shrink --debug
```
And would you look at that, I got a result!
```
✓ Built build/app/outputs/flutter-apk/app-debug.apk.
```
## Install
Building was probably the hard part, but I'm not totally out of the woods yet.
Theoretically I could email this apk to my phone or something, but I'd like
something with a faster turnover time; I need `adb`.
I install `adb` via the `android-tools` package:
```bash
yay -S android-tools
```
Before `adb` will work, however, I need to turn on USB debugging on my phone,
which I do by following [this article][usb_debugging]. Once connected I confirm
that `adb` can talk to my phone by doing:
```bash
adb devices
```
And then, finally, I can install the apk:
```
adb install build/app/outputs/flutter-apk/app-debug.apk
```
NOT SO FAST! MORE ERRORS!
```
adb: failed to install build/app/outputs/flutter-apk/app-debug.apk: Failure [INSTALL_FAILED_UPDATE_INCOMPATIBLE: Package net.defined.mobile_nebula signatures do not match previously installed version; ignoring!]
```
I'm guessing this is because I already have the real nebula app installed. I
uninstall it and try again.
AND IT WORKS!!! FUCK YEAH!
```
Performing Streamed Install
Success
```
I can open the nebula app on my phone and it works... fine. There's some
pre-existing networks already installed, which isn't the case for the Play Store
version as far as I can remember, so I suspect those are only there in the
debugging build. Unfortunately the presence of these test networks causes the
app the throw a bunch of errors because it can't contact those networks. Oh well.
The presence of those test networks, in a way, is actually a good thing, as it
means there's probably already a starting point for what I want to do: building
a per-device nebula app with a config preloaded into it.
## Further Steps
Beyond continuing on towards my actual goal of adding DNS resolvers to this app,
there's a couple of other paths I could potentially go down at this point.
* As mentioned, nixify the whole thing. I'm 99% sure the android-studio GUI
isn't actually needed at all, and I only used it for installing the CMake and
NDK plugins because I didn't bother to look up how to do it on the CLI.
* Figuring out how to do a proper release build would be great, just for my own
education. Based on the [flutter issue][flutter_issue] it's possible that all
that's needed is to downgrade gradle, but maybe that's not so easy.
* Get an android emulator working so that I don't have to install to my phone
everytime I want to test the app out. I'm not sure if that will also work for
the VPN aspect of the app, but it will at least help me iterate on UI changes
faster.
But at this point I'm done for the day, I'll continue on this project some other
time.
[mobile_nebula]: https://github.com/DefinedNet/mobile_nebula
[nebula]: https://slack.engineering/introducing-nebula-the-open-source-global-overlay-network-from-slack/
[dns_issue]: https://github.com/DefinedNet/mobile_nebula/issues/9
[arch]: https://archlinux.org/
[android_wiki]: https://wiki.archlinux.org/index.php/Android#Making_/opt/android-sdk_group-writeable
[heilung]: https://youtu.be/SMJ7pxqk5d4?t=220
[flutter_blog]: https://www.rockyourcode.com/how-to-get-flutter-and-android-working-on-arch-linux/
[gomobile]: https://pkg.go.dev/golang.org/x/mobile/cmd/gomobile
[silver_searcher]: https://github.com/ggreer/the_silver_searcher
[flutter_issue]: https://github.com/flutter/flutter/issues/58247
[usb_debugging]: https://www.droidviews.com/how-to-enable-developer-optionsusb-debugging-mode-on-devices-with-android-4-2-jelly-bean/

View File

@ -1,224 +0,0 @@
---
title: >-
Old Code, New Ideas
description: >-
Looking back at my old code with bemusement and horror.
tags: tech
---
About 3 years ago I put a lot of effort into a set of golang packages called
[mediocre-go-lib][mediocre-go-lib]. The idea was to create a framework around
the ideas I had laid out in [this blog post][program-structure] around the
structure and composability of programs. What I found in using the framework was
that it was quite bulky, not fully thought out, and ultimately difficult for
anyone but me to use. So.... a typical framework then.
My ideas about program structure haven't changed a ton since then, but my ideas
around the patterns which enable that structure have simplified dramatically
(see [my more recent post][component-oriented] for more on that). So in that
spirit I've decided to cut a `v2` branch of `mediocre-go-lib` and start trimming
the fat.
This is going to be an exercise both in deleting old code (very fun) and
re-examining old code which I used to think was good but now know is bad (even
more fun), and I've been looking forward to it for some time.
[mediocre-go-lib]: https://github.com/mediocregopher/mediocre-go-lib
[program-structure]: {% post_url 2019-08-02-program-structure-and-composability %}
[component-oriented]: {% post_url 2020-11-16-component-oriented-programming %}
## mcmp, mctx
The two foundational pieces of `mediocre-go-lib` are the `mcmp` and `mctx`
packages. `mcmp` primarily deals with its [mcmp.Component][component] type,
which is a key/value store which can be used by other packages to store and
retrieve component-level information. Each `mcmp.Component` exists as a node in
a tree of `mcmp.Component`s, and these form the structure of a program.
`mcmp.Component` is able to provide information about its place in that tree as
well (i.e. its path, parents, children, etc...).
If this sounds cumbersome and of questionable utility that's because it is. It's
also not even correct, because a component in a program exists in a DAG, not a
tree. Moreover, each component can keep track of whatever data it needs for
itself using typed fields on a struct. Pretty much all other packages in
`mediocre-go-lib` depend on `mcmp` to function, but they don't _need_ to, I just
designed it that way.
So my plan of attack is going to be to delete `mcmp` completely, and repair all
the other packages.
The other foundational piece of `mediocre-go-lib` is [mctx][mctx]. Where `mcmp`
dealt with arbitrary key/value storage on the component level, `mctx` deals with
it on the contextual level, where each go-routine (i.e. thread) corresponds to a
`context.Context`. The primary function of `mctx` is this one:
```go
// Annotate takes in one or more key/value pairs (kvs' length must be even) and
// returns a Context carrying them.
func Annotate(ctx context.Context, kvs ...interface{}) context.Context
```
I'm inclined to keep this around for now because it will be useful for logging,
but there's one change I'd like to make to it. In its current form the value of
every key/value pair must already exist before being used to annotate the
`context.Context`, but this can be cumbersome in cases where the data you'd want
to annotate is quite hefty to generate but also not necessarily going to be
used. I'd like to have the option to make annotating occur lazily. For this I
add an `Annotator` interface and a `WithAnnotator` function which takes it as an
argument, as well as some internal refactoring to make it all work right:
```go
// Annotations is a set of key/value pairs representing a set of annotations. It
// implements the Annotator interface along with other useful post-processing
// methods.
type Annotations map[interface{}]interface{}
// Annotator is a type which can add annotation data to an existing set of
// annotations. The Annotate method should be expected to be called in a
// non-thread-safe manner.
type Annotator interface {
Annotate(Annotations)
}
// WithAnnotator takes in an Annotator and returns a Context which will produce
// that Annotator's annotations when the Annotations function is called. The
// Annotator will be not be evaluated until the first call to Annotations.
func WithAnnotator(ctx context.Context, annotator Annotator) context.Context
```
`Annotator` is designed like it is for two reasons. The more obvious design,
where the method has no arguments and returns a map, would cause a memory
allocation on every invocation, which could be a drag for long chains of
contexts whose annotations are being evaluated frequently. The obvious design
also leaves open questions about whether the returned map can be modified by
whoever receives it. The design given here dodges these problems without any
obvious drawbacks.
The original implementation also had this unnecessary `Annotation` type:
```go
// Annotation describes the annotation of a key/value pair made on a Context via
// the Annotate call.
type Annotation struct {
Key, Value interface{}
}
```
I don't know why this was ever needed, as an `Annotation` was never passed into
nor returned from any function. It was part of the type `AnnotationSet`, but
that could easily be refactored into a `map[interface{}]interface{}` instead. So
I factored `Annotation` out completely.
[component]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/mcmp#Component
[mctx]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/mctx
## mcfg, mrun
The next package to tackle is [mcfg][mcfg], which deals with configuration via
command line arguments and environment variables. The package is set up to use
the old `mcmp.Component` type such that each component could declare its own
configuration parameters in the global configuration. In this way the
configuration would have a hierarchy of its own which matches the component
tree.
Given that I now think `mcmp.Component` isn't the right course of action it
would be the natural step to take that aspect out of `mcfg`, leaving only a
basic command-line and environment variable parser. There are many other basic
parsers of this sort out there, including [one][flagconfig] or [two][lever] I
wrote myself, and frankly I don't think the world needs another. So `mcfg` is
going away.
The [mrun][mrun] package is the corresponding package to `mcfg`; where `mcfg`
dealt with configuration of components `mrun` deals with the initialization and
shutdown of those same components. Like `mcfg`, `mrun` relies heavily on
`mcmp.Component`, and doesn't really have any function with that type gone. So
`mrun` is a gonner too.
[mcfg]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/mcfg
[mrun]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/mrun
[flagconfig]: https://github.com/mediocregopher/flagconfig
[lever]: https://github.com/mediocregopher/lever
## mlog
The [mlog][mlog] package is primarily concerned with, as you might guess,
logging. While there are many useful logging packages out there none of them
integrate with `mctx`'s annotations, so it is useful to have a custom logging
package here. `mlog` also has the nice property of not being extremely coupled
to `mcmp.Component` like other packages; it's only necessary to delete a handful
of global functions which aren't a direct part of the `mlog.Logger` type in
order to free the package from that burden.
With that said, the `mlog.Logger` type could still use some work. It's primary
pattern looks like this:
```go
// Message describes a message to be logged.
type Message struct {
Level
Description string
Contexts []context.Context
}
// Info logs an InfoLevel message.
func (l *Logger) Info(descr string, ctxs ...context.Context) {
l.Log(mkMsg(InfoLevel, descr, ctxs...))
}
```
The idea was that if the user has multiple `Contexts` in hand, each one possibly
having some relevant annotations, all of those `Context`s' annotations could be
merged together for the log entry.
Looking back it seems to me that the only thing `mlog` should care about is the
annotations, and not _where_ those annotations came from. So the new pattern
looks like this:
```go
// Message describes a message to be logged.
type Message struct {
Context context.Context
Level
Description string
Annotators []Annotators
}
// Info logs a LevelInfo message.
func (l *Logger) Info(ctx context.Context, descr string, annotators ...mctx.Annotator)
```
The annotations on the given `Context` will be included, and then any further
`Annotator`s can be added on. This will leave room for `merr` later.
There's some other warts in `mlog.Logger` that should be dealt with as well,
including some extraneous methods which were only used due to `mcmp.Component`,
some poorly named types, a message handler which didn't properly clean itself
up, and making `NewLogger` take in parameters with which it can be customized as
needed (previously it only allowed for a single configuration). I've also
extended `Message` to include a timestamp, a namespace field, and some other
useful information.
[mlog]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/mlog
## Future Work
I've run out of time for today, but future work on this package includes:
* Updating [merr][merr] with support for `mctx.Annotations`.
* Auditing the [mnet][mnet], [mhttp][mhttp], and [mrpc][mrpc] packages to see if
they contain anything worth keeping.
* Probably deleting the [m][m] package entirely; I don't even really remember
what it does.
* Probably deleting the [mdb][mdb] package entirely; it only makes sense in the
context of `mcmp.Component`.
* Making a difficult decision about [mtest][mtest]; I put a lot of work into it,
but is it really any better than [testify][testify]?
[merr]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/merr
[mnet]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/mnet
[mhttp]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/mhttp
[mrpc]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/mrpc
[m]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/m
[mdb]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/mdb
[mtest]: https://pkg.go.dev/github.com/mediocregopher/mediocre-go-lib/mtest
[testify]: https://github.com/stretchr/testify

View File

@ -1,232 +0,0 @@
---
title: >-
Building gomobile Using Nix
description: >-
Harder than I thought it would be!
series: nebula
tags: tech
---
When I last left off with the nebula project I wanted to [nix][nix]-ify the
build process for Cryptic's [mobile_nebula][mobile_nebula] fork. While I've made
progress on the overall build, one particular bit of it really held me up, so
I'm writing about that part here. I'll finish the full build at a later time.
## gomobile
[gomobile][gomobile] is a toolkit for the go programming language to allow for
running go code on Android and iOS devices. `mobile_nebula` uses `gomobile` to
build a simple wrapper around the nebula client that the mobile app can then
hook into.
This means that in order to nix-ify the entire `mobile_nebula` project I first
need to nix-ify `gomobile`, and since there isn't (at time of writing) an
existing package for `gomobile` in the nixpkgs repo, I had to roll my own.
I started with a simple `buildGoModule` nix expression:
```
pkgs.buildGoModule {
pname = "gomobile";
version = "unstable-2020-12-17";
src = pkgs.fetchFromGitHub {
owner = "golang";
repo = "mobile";
rev = "e6ae53a27f4fd7cfa2943f2ae47b96cba8eb01c9";
sha256 = "03dzis3xkj0abcm4k95w2zd4l9ygn0rhkj56bzxbcpwa7idqhd62";
};
vendorSha256 = "1n1338vqkc1n8cy94501n7jn3qbr28q9d9zxnq2b4rxsqjfc9l94";
}
```
The basic idea here is that `buildGoModule` will acquire a specific revision of
the `gomobile` source code from github, then attempt to build it. However,
`gomobile` is a special beast in that it requires a number of C/C++ libraries in
order to be built. I discovered this upon running this expression, when I
received this error:
```
./work.h:12:10: fatal error: GLES3/gl3.h: No such file or directory
12 | #include <GLES3/gl3.h> // install on Ubuntu with: sudo apt-get install libegl1-mesa-dev libgles2-mesa-dev libx11-dev
```
This stumped me for a bit, as I couldn't figure out a) the "right" place to
source the `GLES3` header file from, and b) how to properly hook that into the
`buildGoModule` expression. My initial attempts involved trying to include
versions of the header file from my `androidsdk` nix package which I had already
gotten (mostly) working, but the version which ships there appears to expect to
be using clang. `cgo` (go's compiler which is used for C/C++ interop) only
supports gcc, so that strategy failed.
I didn't like having to import the header file from `androidsdk` anyway, as it
meant that my `gomobile` would only work within the context of the
`mobile_nebula` project, rather than being a standalone utility.
## nix-index
At this point I flailed around some more trying to figure out where to get this
header file from. Eventually I stumbled on the [nix-index][nix-index] project,
which implements something similar to the `locate` utility on linux: you give it
a file pattern, and it searches your active nix channels for any packages which
provide a file matching that pattern.
Since nix is amazing it's not actually necessary to install `nix-index`, I
simply start up a shell with the package available using `nix-shell -p
nix-index`. On first run I needed to populate the index by running the
`nix-index` command, which took some time, but after that finding packages which
provide the file I need is as easy as:
```
> nix-shell -p nix-index
[nix-shell:/tmp]$ nix-locate GLES3/gl3.h
(zulip.out) 82,674 r /nix/store/wbfw7w2ixdp317wip77d4ji834v1k1b9-libglvnd-1.3.2-dev/include/GLES3/gl3.h
libglvnd.dev 82,674 r /nix/store/pghxzmnmxdcarg5bj3js9csz0h85g08m-libglvnd-1.3.2-dev/include/GLES3/gl3.h
emscripten.out 82,666 r /nix/store/x3c4y2h5rn1jawybk48r6glzs1jl029s-emscripten-2.0.1/share/emscripten/system/include/GLES3/gl3.h
```
So my mystery file is provided by a few packages, but `libglvnd.dev` stood out
to me as it's also the pacman package which provides the same file in my real
operating system:
```
> yay -Qo /usr/include/GLES3/gl3.h
/usr/include/GLES3/gl3.h is owned by libglvnd 1.3.2-1
```
This gave me some confidence that this was the right track.
## cgo
My next fight was with `cgo` itself. Go's build process provides a few different
entry points for C/C++ compiler/linker flags, including both environment
variables and command-line arguments. But I wasn't using `go build` directly,
instead I was working through nix's `buildGoModule` wrapper. This added a huge
layer of confusion as all of nixpkgs is pretty terribly documented, so you
really have to just divine behavior from the [source][buildGoModule-source]
(good luck).
After lots of debugging (hint: `NIX_DEBUG=1`) I determined that all which is
actually needed is to set the `CGO_CFLAGS` variable within the `buildGoModule`
arguments. This would translate to the `CGO_CFLAGS` environment variable being
set during all internal commands, and whatever `go build` commands get used
would pick up my compiler flags from that.
My new nix expression looked like this:
```
pkgs.buildGoModule {
pname = "gomobile";
version = "unstable-2020-12-17";
src = pkgs.fetchFromGitHub {
owner = "golang";
repo = "mobile";
rev = "e6ae53a27f4fd7cfa2943f2ae47b96cba8eb01c9";
sha256 = "03dzis3xkj0abcm4k95w2zd4l9ygn0rhkj56bzxbcpwa7idqhd62";
};
vendorSha256 = "1n1338vqkc1n8cy94501n7jn3qbr28q9d9zxnq2b4rxsqjfc9l94";
CGO_CFLAGS = [
"-I ${pkgs.libglvnd.dev}/include"
];
}
```
Running this produced a new error. Progress! The new error was:
```
/nix/store/p792j5f44l3f0xi7ai5jllwnxqwnka88-binutils-2.31.1/bin/ld: cannot find -lGLESv2
collect2: error: ld returned 1 exit status
```
So pretty similar to the previous issue, but this time the linker wasn't finding
a library file rather than the compiler not finding a header file. Once again I
used `nix-index`'s `nix-locate` command to find that this library file is
provided by the `libglvnd` package (as opposed to `libglvnd.dev`, which provided
the header file).
Adding `libglvnd` to the `CGO_CFLAGS` did not work, as it turns out that flags
for the linker `cgo` uses get passed in via `CGO_LDFLAGS` (makes sense). After
adding this new variable I got yet another error; this time `X11/Xlib.h` was not
able to be found. I repeated the process of `nix-locate`/add to `CGO_*FLAGS` a
few more times until all dependencies were accounted for. The new nix expression
looked like this:
```
pkgs.buildGoModule {
pname = "gomobile";
version = "unstable-2020-12-17";
src = pkgs.fetchFromGitHub {
owner = "golang";
repo = "mobile";
rev = "e6ae53a27f4fd7cfa2943f2ae47b96cba8eb01c9";
sha256 = "03dzis3xkj0abcm4k95w2zd4l9ygn0rhkj56bzxbcpwa7idqhd62";
};
vendorSha256 = "1n1338vqkc1n8cy94501n7jn3qbr28q9d9zxnq2b4rxsqjfc9l94";
CGO_CFLAGS = [
"-I ${pkgs.libglvnd.dev}/include"
"-I ${pkgs.xlibs.libX11.dev}/include"
"-I ${pkgs.xlibs.xorgproto}/include"
"-I ${pkgs.openal}/include"
];
CGO_LDFLAGS = [
"-L ${pkgs.libglvnd}/lib"
"-L ${pkgs.xlibs.libX11}/lib"
"-L ${pkgs.openal}/lib"
];
}
```
## Tests
The `CGO_*FLAGS` variables took care of all compiler/linker errors, but there
was one issue left: `buildGoModule` apparently runs the project's tests after
the build phase. `gomobile`'s tests were actually mostly passing, but some
failed due to trying to copy files around, which nix was having none of. After
some more [buildGoModule source][buildGoModule-source] divination I found that
if I passed an empty `checkPhase` argument it would skip the check phase, and
therefore skip running these tests.
## Fin!
The final nix expression looks like so:
```
pkgs.buildGoModule {
pname = "gomobile";
version = "unstable-2020-12-17";
src = pkgs.fetchFromGitHub {
owner = "golang";
repo = "mobile";
rev = "e6ae53a27f4fd7cfa2943f2ae47b96cba8eb01c9";
sha256 = "03dzis3xkj0abcm4k95w2zd4l9ygn0rhkj56bzxbcpwa7idqhd62";
};
vendorSha256 = "1n1338vqkc1n8cy94501n7jn3qbr28q9d9zxnq2b4rxsqjfc9l94";
CGO_CFLAGS = [
"-I ${pkgs.libglvnd.dev}/include"
"-I ${pkgs.xlibs.libX11.dev}/include"
"-I ${pkgs.xlibs.xorgproto}/include"
"-I ${pkgs.openal}/include"
];
CGO_LDFLAGS = [
"-L ${pkgs.libglvnd}/lib"
"-L ${pkgs.xlibs.libX11}/lib"
"-L ${pkgs.openal}/lib"
];
checkPhase = "";
}
```
Once I complete the nix-ification of `mobile_nebula` I'll submit a PR to the
nixpkgs upstream with this, so that others can have `gomobile` available as
well!
[nix]: https://nixos.org/manual/nix/stable/
[mobile_nebula]: https://github.com/cryptic-io/mobile_nebula
[gomobile]: https://github.com/golang/mobile
[nix-index]: https://github.com/bennofs/nix-index
[buildGoModule-source]: https://github.com/NixOS/nixpkgs/blob/26117ed4b78020252e49fe75f562378063471f71/pkgs/development/go-modules/generic/default.nix

View File

@ -1,18 +0,0 @@
---
title: >-
Married!
description: >-
We did it!
---
Just us, an aspen grove, and a photographer to witness. Between weather and
foot-traffic on the trail everything went as well as we hoped it would; it was a
wonderful day.
{% include image.html dir="wedding" file="1.jpg" width=4005 %}
{% include image.html dir="wedding" file="2.jpg" width=4004 %}
{% include image.html dir="wedding" file="3.jpg" width=4005 %}
More pictures coming soon to a website near you!

View File

@ -1,195 +0,0 @@
---
title: >-
Conditionals in Ginger
description: >-
Some different options for how "if" statements could work.
series: ginger
tags: tech
---
In the [last ginger post][last] I covered a broad overview of how I envisioned
ginger would work as a language, but there were two areas where I felt there was
some uncertainty: conditionals and loops. In this post I will be focusing on
conditionals, and going over a couple of options for how they could work.
[last]: {% post_url 2021-01-09-ginger %}
## Preface
By "conditional" I'm referring to what programmers generally know as the "if"
statement; some mechanism by which code can do one thing or another based on
circumstances at runtime. Without some form of a conditional a programming
language is not Turing-complete and can't be used for anything interesting.
Given that it's uncommon to have a loop without some kind of a conditional
inside of it (usually to exit the loop), but it's quite common to have a
conditional with no loop in sight, it makes more sense to cover conditionals
before loops. Whatever decision is reached regarding conditionals will impact
how loops work, but not necessarily the other way around.
For the duration of this post I will be attempting to construct a simple
operation which takes two integers as arguments. If the first is less than
the second then the operation returns the addition of the two, otherwise the
operation returns the second subtracted from the first. In `go` this operation
would look like:
```go
func op(a, b int) int {
if a < b {
return a + b
}
return b - a
}
```
## Pattern 1: Branches As Inputs
The pattern I'll lay out here is simultaneously the first pattern which came to
me when trying to figure this problem out, the pattern which is most like
existing mainstream programming languages, and (in my opinion) the worst pattern
of the bunch. Here is what it looks like:
```
in -lt-> } -if-> out
}
in -add-> }
}
in -1-> } }
in -0-> } -sub-> }
```
The idea here is that the operation `if` could take a 3-tuple whose elements
are, respectively: a boolean, and two other edges which won't be evaluated until
`if` is evaluated. If the boolean is true then `if` outputs the output of the
first edge (the second element in the tuple), and otherwise it will output the
value of the second edge.
This idea doesn't work for a couple reasons. The biggest is that, if there were
multiple levels of `if` statements, the structure of the graph grows out
_leftward_, whereas the flow of data is rightwards. For someone reading the code
to know what `if` will produce in either case they must first backtrack through
the graph, find the origin of that branch, then track that leftward once again
to the `if`.
The other reason this doesn't work is because it doesn't jive with any pattern
for loops I've come up with. This isn't evident from this particular example,
but consider what this would look like if either branch of the `if` needed to
loop back to a previous point in the codepath. If that's a difficult or
confusing task for you, you're not alone.
## Pattern 2: Pattern Matching
There's quite a few languages with pattern matching, and even one which I know
of (erlang) where pattern matching is the primary form of conditionals, and the
more common `if` statement is just some syntactic sugar on top of the pattern
matching.
I've considered pattern matching for ginger. It might look something like:
{% raw %}
```
in -> } -switch-> } -> {{{A, B}, _}, ({A,B}-lt->out)} -0-> } -add-> out
in -1-> } -> } } -1-> } -sub-> out
in -0-> }
```
{% endraw %}
The `switch` operation posits that a node can have multiple output edges. In a
graph this is fine, but it's worth noting. Graphs tend to be implemented such
that edges to and from a node are unordered, but in ginger it seems unlikely
that that will be the case.
The last output edge from the switch is the easiest to explain: it outputs the
input value to `switch` when no other branches are able to be taken. But the
input to `switch` is a bit complex in this example: It's a 2-tuple whose first
element is `in`, and whose second element is `in` but with reversed elements.
In the last output edge we immediately pipe into a `1` operation to retrieve
that second element and call `sub` on that, since that's the required behavior
of the example.
All other branches (in this switch there is only one, the first branch) output
to a value. The form of this value is a tuple (denoted by enclosed curly braces
here) of two values. The first value is the pattern itself, and the second is an
optional predicate. The pattern in this example will match a 2-tuple, ignoring
the second element in that tuple. The first element will itself be matched
against a 2-tuple, and assign each element to the variables `A` and `B`,
respectively. The second element in the tuple, the predicate, is a sub-graph
which returns a boolean, and can be used for further specificity which can't be
covered by the pattern matching (in this case, comparing the two values to each
other).
The output from any of `switch`'s branches is the same as its input value, the
only question is which branch is taken. This means that there's no backtracking
when reading a program using this pattern; no matter where you're looking you
will only have to keep reading rightward to come to an `out`.
There's a few drawbacks with this approach. The first is that it's not actually
very easy to read. While pattern matching can be a really nice feature in
languages that design around it, I've never seen it used in a LISP-style
language where the syntax denotes actual datastructures, and I feel that in such
a context it's a bit unwieldy. I could be wrong.
The second drawback is that pattern matching is not simple to implement, and I'm
not even sure what it would look like in a language where graphs are the primary
datastructure. In the above example we're only matching into a tuple, but how
would you format the pattern for a multi-node, multi-edge graph? Perhaps it's
possible. But given that any such system could be implemented as a macro on top
of normal `if` statements, rather than doing it the other way around, it seems
better to start with the simpler option.
(I haven't talked about it yet, but I'd like for ginger to be portable to
multiple backends (i.e. different processor architectures, vms, etc). If the
builtins of the language are complex, then doing this will be a difficult task,
whereas if I'm conscious of that goal during design I think it can be made to be
very simple. In that light I'd prefer to not require pattern matching to be a
builtin.)
The third drawback is that the input to the `switch` requires careful ordering,
especially in cases like this one where a different value is needed depending on
which branch is taken. I don't consider this to be a huge drawback, as
encourages good data design and is a common consideration in other functional
languages.
## Pattern 3: Branches As Outputs
Taking a cue from the pattern matching example, we can go back to `if` and take
advantage of multiple output edges being a possibility:
```
in -> } -> } -if-> } -0-> } -add-> out
in -1-> } -> } } } -1-> } -sub-> out
in -0-> } }
}
in -lt-> }
```
It's not perfect, but I'd say this is the nicest of the three options so far.
`if` is an operation which takes a 2-tuple. The second element of the tuple is a
boolean, if the boolean is true then `if` passes the first element of its tuple
to the first branch, otherwise it passes it to the second. In this way `if`
becomes kind of like a fork in a train track: it accepts some payload (the first
element of its input tuple) and depending on conditions (the second element) it
directs the payload one way or the other.
This pattern retains the benefits of the pattern matching example, where one
never needs to backtrack in order to understand what is about to happen next,
while also being much more readable and simpler to implement. It also retains
one of the drawbacks of the pattern matching example, in that the inputs to `if`
must be carefully organized based on the needs of the output branches. As
before, I don't consider this to be a huge drawback.
There's other modifications which might be made to this `if` to make it even
cleaner, e.g. one could make it accept a 3-tuple, rather than a 2-tuple, in
order to supply differing values to be used depending on which branch is taken.
To me these sorts of small niceties are better left to be implemented as macros,
built on top of a simpler but less pleasant builtin.
## Fin
If you have other ideas around how conditionals might be done in a graph-based
language please [email me][email]; any and all contributions are welcome! One
day I'll get around to actually implementing some of ginger, but today is not
that day.
[email]: mailto:mediocregopher@gmail.com

View File

@ -1,195 +0,0 @@
---
title: >-
Conditionals in Ginger, Errata
description: >-
Too clever by half.
series: ginger
tags: tech
---
After publishing the last post in the series I walked away from my computer
feeling that I was very clever and had made a good post. This was incorrect.
To summarize [the previous post][prev], it's not obvious which is the best way
to structure conditionals in a graphical programming language. My favorite
solution looked something like this:
```
in -> } -> } -if-> } -0-> } -add-> out
in -1-> } -> } } } -1-> } -sub-> out
in -0-> } }
}
in -lt-> }
```
Essentially an `if` operator which accepts a value and a boolean, and which has
two output edges. If the boolean is true then the input value is sent along the
first output edge, and if it's false it's sent along the second.
This structure is not possible, given the properties of ginger graphs that have
been laid out in [other posts in the series][other].
## Nodes, Tuples, and Edges
A ginger graph, as it has been presented so far, is composed of these three
elements. A node has a value, and its value is unique to the graph; if two nodes
have the same value then they are the same node. Edges connect two nodes or
tuples together, and have a value and direction. Tuples are, in essence, a node
whose value is its input edges.
The `if` operation above lies on an edge, not a node or tuple. It cannot have
multiple output edges, since it cannot have any edges at all. It _is_ an edge.
So it's back to the drawing board, to some extent. But luckily I've got some
more ideas in my back pocket.
## Forks and Junctions
In an older conception of ginger there was no tuple, but instead there were
forks and junctions. A junction was essentially the same as a tuple, just named
differently: a node whose value is its input edges. A fork was just the
opposite, a node whose value is its output edges. Junctions and forks naturally
complimented each other, but ultimately I didn't find forks to be useful for
much because there weren't cases where it was necessary to have a single edge be
split across multiple output edges directly; any case which appeared to require
a fork could be satisfied by directing the edge into a 1-tuple and using the
output edges of the 1-tuple.
But now we have such a case. The 1-tuple won't work, because the `if` operator
would only see the 1-tuple, not its edges. It could be supposed that the graph
interpreter could say that an `if` operation must be followed by a 1-tuple, and
that the 1-tuple's output edges have a special meaning in that circumstance. But
making the output edges of a 1-tuple have different meaning in different
circumstances isn't very elegant.
So a fork might be just the thing here. For the example I will represent a
fork as the opposite of a tuple: a vertical column of `{` characters.
```
in -> } -> } -if-> { -0-> } -add-> out
in -1-> } -> } } { -1-> } -sub-> out
in -0-> } }
}
in -lt-> }
```
It _looks_ elegant, which is nice. I am curious though if there's any other
possible use-case where a fork might be useful... if there's not then it seems
odd to introduce an entire new element just to support a single operation. Why
not just make that operation itself the new element?
## Switch it Up
In most conceptions of a flowchart that I've seen a conditional is usually
represented as a node with a different shape than the other nodes (often a
diamond). Ginger could borrow this idea for itself, and declare a new graph
element, alongside nodes, tuples, and edges, called a switch.
Let's say a switch is simply represented by a `-<>`, and acts like a node in all
aspects except that it has no value and is not unique to the graph.
The example presented in the [previous post][prev] would look something like
this:
```
in -> } -> } -<> -0-> } -add-> out
in -1-> } -> } } -1-> } -sub-> out
in -0-> } }
}
in -lt-> }
```
This isn't the _worst_. Like the fork it's adding a new element, but that
element's existence is required and its usage is very specific to that
requirement, whereas the fork's existence is required but ambiguously useful
outside of that requirement.
On the other hand, there are macros to consider...
## Macrophillic
Ginger will certainly support macros, and as alluded to in the last post I'd
like even conditional operations to be fair game for those who want to construct
their own more complex operators. In the context of the switch `-<>` element,
would someone be able to create something like a pattern matching conditional?
If the builtin conditional is implemented as a new graph element then it seems
that the primary way to implement a custom conditional macro will also involve a
new graph element.
While I'm not flat out opposed to allowing for custom graph elements, I'm
extremely skeptical that it's necessary, and would like it to be proven
necessary before considering it. So if we can have a basic conditional, _and_
custom conditional macros built on top of the same broadly useful element, that
seems like the better strategy.
So all of that said, it seems I'm leaning towards forks as the better strategy
in this. But I'd like a different name. "Fork" was nice as being the compliment
of a "junction", but I like "tuple" way more than "junction" because the term
applies well both to the structural element _and_ to the transformation that
element performs (i.e. a tuple element combines its input edges' values into a
tuple value). But "tuple" and "fork" seem weird together...
## Many Minutes Later...
A brief search of the internet reveals no better word than "fork". A place
where a tree's trunk splits into two separate trunks is called a "fork". A
place where a river splits into two separate rivers is called a "fork".
Similarly with roads. And that _is_ what's happening, from the point of view of
the graph's structure: it is an element whose only purpose is to denote multiple
outward edges.
So "fork" it is.
## Other considerations
A 1-tuple is interesting in that it acts essentially as a concatenation of two
edges. A 1-fork could, theoretically, do the same thing:
```
a -foo-> } -bar-> b
c -far-> { -boo-> d
```
The top uses a tuple, the bottom a fork. Each is, conceptually, valid, but I
don't like that two different elements can be used for the exact same use-case.
A 1-tuple is an established concept in data structures, so I am loath to give it
up. A 1-fork, on the other hand, doesn't make sense structurally (would you
point to any random point on a river and call it a "1-fork"?), and fork as a
whole doesn't really have any analog in the realm of data structures. So I'm
prepared to declare 1-forks invalid from the viewpoint of the language
interpreter.
Another consideration: I already expect that there's going to be confusion as to
when to use a fork and when to use multiple outputs from a node. For example,
here's a graph which uses a fork:
```
a -> { -op1-> foo
{ -op2-> bar
```
and here's a graph which has multiple outputs from the same node:
```
a -op1-> foo
-op2-> bar
```
Each could be interpreted to mean the same thing: "set `foo` to the result of
passing `a` into `op1`, and set `bar` to the result of passing `a` into `op2`."
As with the 1-tuple vs 1-fork issue, we have another case where the same
task might be accomplished with two different patterns. This case is trickier
though, and I don't have as confident an answer.
I think an interim rule which could be put in place, subject to review later, is
that multiple edges from a node or tuple indicate that that same value is being
used for multiple operations, while a fork indicates something specific to the
operation on its input edge. It's not a pretty rule, but I think it will do.
Stay tuned for next week when I realize that actually all of this is wrong and
we start over again!
[prev]: {% post_url 2021-03-01-conditionals-in-ginger %}
[other]: {% post_url 2021-01-09-ginger %}

View File

@ -1,311 +0,0 @@
---
title: >-
Ripple: A Game
description: >-
Hop Till You Drop!
tags: tech
series: ripple
---
<p>
<b>Movement:</b> Arrow keys or WASD<br/>
<b>Jump:</b> Space<br/>
<b>Goal:</b> Jump as many times as possible without touching a ripple!<br/>
<br/>
<b>Press Jump To Begin!</b>
</p>
<canvas id="canvas"
style="border:1px dashed #AAA"
tabindex=0>
Your browser doesn't support canvas. At this point in the world that's actually
pretty cool, well done!
</canvas>
<button onclick="resetGame()">(R)eset</button>
<span style="font-size: 2rem; margin-left: 1rem;">Score:
<span style="font-weight: bold" id="score">0</span>
</span>
<script type="text/javascript">
const palette = [
"#264653",
"#2A9D8F",
"#E9C46A",
"#F4A261",
"#E76F51",
];
const width = 800;
const height = 600;
function hypotenuse(w, h) {
return Math.sqrt(Math.pow(w, 2) + Math.pow(h, 2));
}
let canvas = document.getElementById("canvas");
canvas.width = width;
canvas.height = height;
let score = document.getElementById("score");
const whitelistedKeys = {
"ArrowUp": {},
"KeyW": {map: "ArrowUp"},
"ArrowLeft": {},
"KeyA": {map: "ArrowLeft"},
"ArrowRight": {},
"KeyD": {map: "ArrowRight"},
"ArrowDown": {},
"KeyS": {map: "ArrowDown"},
"Space": {},
"KeyR": {},
};
let keyboard = {};
canvas.addEventListener('keydown', (event) => {
let keyInfo = whitelistedKeys[event.code];
if (!keyInfo) return;
let code = event.code;
if (keyInfo.map) code = keyInfo.map;
event.preventDefault();
keyboard[code] = true;
});
canvas.addEventListener('keyup', (event) => {
let keyInfo = whitelistedKeys[event.code];
if (!keyInfo) return;
let code = event.code;
if (keyInfo.map) code = keyInfo.map;
event.preventDefault();
delete keyboard[code];
});
let ctx = canvas.getContext("2d");
let currTick;
let drops;
class Drop {
constructor(x, y, bounces, color) {
this.tick = currTick;
this.x = x;
this.y = y;
this.thickness = (bounces+1) * 0.25;
this.color = color ? color : palette[Math.floor(Math.random() * palette.length)];
this.winner = false;
this.maxRadius = hypotenuse(x, y);
this.maxRadius = Math.max(this.maxRadius, hypotenuse(width-x, y));
this.maxRadius = Math.max(this.maxRadius, hypotenuse(x, height-y));
this.maxRadius = Math.max(this.maxRadius, hypotenuse(width-x, height-y));
drops.push(this);
if (bounces > 0) {
new Drop(x, -y, bounces-1, this.color);
new Drop(-x, y, bounces-1, this.color);
new Drop((2*width)-x, y, bounces-1, this.color);
new Drop(x, (2*height)-y, bounces-1, this.color);
}
}
radius() { return currTick - this.tick; }
draw() {
ctx.beginPath();
ctx.arc(this.x, this.y, this.radius(), 0, Math.PI * 2, false);
ctx.closePath();
ctx.lineWidth = this.thickness;
ctx.strokeStyle = this.winner ? "#FF0000" : this.color;
ctx.stroke();
}
canGC() {
return this.radius() > this.maxRadius;
}
}
const playerRadius = 10;
const playerMoveAccel = 0.5;
const playerMoveDecel = 0.7;
const playerMaxMoveSpeed = 4;
const playerJumpSpeed = 0.08;
const playerMaxHeight = 1;
const playerGravity = 0.01;
class Player{
constructor(x, y, color) {
this.x = x;
this.y = y;
this.z = 0;
this.xVelocity = 0;
this.yVelocity = 0;
this.zVelocity = 0;
this.color = color;
this.falling = false;
this.lastJumpHeight = 0;
this.loser = false;
}
act() {
if (keyboard["ArrowUp"]) {
this.yVelocity = Math.max(-playerMaxMoveSpeed, this.yVelocity - playerMoveAccel);
} else if (keyboard["ArrowDown"]) {
this.yVelocity = Math.min(playerMaxMoveSpeed, this.yVelocity + playerMoveAccel);
} else if (this.yVelocity > 0) {
this.yVelocity = Math.max(0, this.yVelocity - playerMoveDecel);
} else if (this.yVelocity < 0) {
this.yVelocity = Math.min(0, this.yVelocity + playerMoveDecel);
}
this.y += this.yVelocity;
this.y = Math.max(0+playerRadius, this.y);
this.y = Math.min(height-playerRadius, this.y);
if (keyboard["ArrowLeft"]) {
this.xVelocity = Math.max(-playerMaxMoveSpeed, this.xVelocity - playerMoveAccel);
} else if (keyboard["ArrowRight"]) {
this.xVelocity = Math.min(playerMaxMoveSpeed, this.xVelocity + playerMoveAccel);
} else if (this.xVelocity > 0) {
this.xVelocity = Math.max(0, this.xVelocity - playerMoveDecel);
} else if (this.xVelocity < 0) {
this.xVelocity = Math.min(0, this.xVelocity + playerMoveDecel);
}
this.x += this.xVelocity;
this.x = Math.max(0+playerRadius, this.x);
this.x = Math.min(width-playerRadius, this.x);
let jumpHeld = keyboard["Space"];
if (jumpHeld && !this.falling && this.z < playerMaxHeight) {
this.lastJumpHeight = 0;
this.zVelocity = playerJumpSpeed;
} else {
this.zVelocity = Math.max(-playerJumpSpeed, this.zVelocity - playerGravity);
this.falling = this.z > 0;
}
let prevZ = this.z;
this.z = Math.max(0, this.z + this.zVelocity);
this.lastJumpHeight = Math.max(this.z, this.lastJumpHeight);
}
draw() {
let y = this.y - (this.z * 40);
let radius = playerRadius * (this.z+1)
// draw main
ctx.beginPath();
ctx.arc(this.x, y, radius, 0, Math.PI * 2, false);
ctx.closePath();
ctx.lineWidth = 0;
ctx.fillStyle = this.color;
ctx.fill();
if (this.loser) {
ctx.strokeStyle = '#FF0000';
ctx.lineWidth = 2;
ctx.stroke();
}
// draw shadow, if in the air
if (this.z > 0) {
let radius = Math.max(0, playerRadius * (1.2 - this.z));
ctx.beginPath();
ctx.arc(this.x, this.y, radius, 0, Math.PI * 2, false);
ctx.closePath();
ctx.lineWidth = 0;
ctx.fillStyle = this.color+"33";
ctx.fill();
}
}
}
let player;
let gameState;
let numJumps;
function resetGame() {
currTick = 0;
drops = [];
player = new Player(width/2, height/2, palette[0]);
gameState = 'play';
numJumps = 0;
canvas.focus();
}
resetGame();
let requestAnimationFrame =
window.requestAnimationFrame ||
window.mozRequestAnimationFrame ||
window.webkitRequestAnimationFrame ||
window.msRequestAnimationFrame;
function doTick() {
if (keyboard['KeyR']) {
resetGame();
}
if (gameState == 'play') {
let playerPrevZ = player.z;
player.act();
if (playerPrevZ > 0 && player.z == 0) {
let bounces = Math.floor((player.lastJumpHeight*1.8)+1);
console.log("spawning drop with bounces:", bounces);
new Drop(player.x, player.y, bounces);
} else if (playerPrevZ == 0 && player.z > 0) {
numJumps++;
}
score.innerHTML = numJumps;
if (player.z == 0) {
for (let i in drops) {
let drop = drops[i];
let dropRadius = drop.radius();
if (dropRadius < playerRadius * 1.5) continue;
let hs = Math.pow(drop.x-player.x, 2) + Math.pow(drop.y-player.y, 2);
if (hs > Math.pow(playerRadius + dropRadius, 2)) {
continue;
} else if (Math.sqrt(hs) <= Math.abs(dropRadius-playerRadius)) {
continue;
} else {
console.log("game over");
drop.winner = true;
player.loser = true;
gameState = 'gameOver';
}
}
}
}
drops = drops.filter(drop => !drop.canGC());
ctx.clearRect(0, 0, canvas.width, canvas.height);
drops.forEach(drop => drop.draw());
player.draw()
if (gameState == 'play') currTick++;
requestAnimationFrame(doTick);
}
requestAnimationFrame(doTick);
</script>
_Do you have the patience to wait<br/>
till your mud settles and the water is clear?_
## Backstory
This is a game I originally implemented in lua, which you can find [here][orig].
It's a fun concept that I wanted to show off again, as well as to see if I could
whip it up in an evening in javascript (I can!)
Send me your high scores! I top out around 17.
[orig]: https://github.com/mediocregopher/ripple

View File

@ -1,227 +0,0 @@
---
title: >-
A Simple Rule for Better Errors
description: >-
...and some examples of the rule in action.
tags: tech
---
This post will describe a simple rule for writing error messages that I've
been using for some time and have found to be worthwhile. Using this rule I can
be sure that my errors are propagated upwards with everything needed to debug
problems, while not containing tons of extraneous or duplicate information.
This rule is not specific to any particular language, pattern of error
propagation (e.g. exceptions, signals, simple strings), or method of embedding
information in errors (e.g. key/value pairs, formatted strings).
I do not claim to have invented this system, I'm just describing it.
## The Rule
Without more ado, here's the rule:
> A function sending back an error should not include information the caller
> could already know.
Pretty simple, really, but the best rules are. Keeping to this rule will result
in error messages which, once propagated up to their final destination (usually
some kind of logger), will contain only the information relevant to the error
itself, with minimal duplication.
The reason this rule works in tandem with good encapsulation of function
behavior. The caller of a function knows only the inputs to the function and, in
general terms, what the function is going to do with those inputs. If the
returned error only includes information outside of those two things then the
caller knows everything it needs to know about the error, and can continue on to
propagate that error up the stack (with more information tacked on if necessary)
or handle it in some other way.
## Examples
(For examples I'll use Go, but as previously mentioned this rule will be useful
in any other language as well.)
Let's go through a few examples, to show the various ways that this rule can
manifest in actual code.
**Example 1: Nothing to add**
In this example we have a function which merely wraps a call to `io.Copy` for
two files:
```go
func copyFile(dst, src *os.File) error {
_, err := io.Copy(dst, src)
return err
}
```
In this example there's no need to modify the error from `io.Copy` before
returning it to the caller. What would we even add? The caller already knows
which files were involved in the error, and that the error was encountered
during some kind of copy operation (since that's what the function says it
does), so there's nothing more to say about it.
**Example 2: Annotating which step an error occurs at**
In this example we will open a file, read its contents, and return them as a
string:
```go
func readFile(path string) (string, error) {
f, err := os.Open(path)
if err != nil {
return "", fmt.Errorf("opening file: %w", err)
}
defer f.Close()
contents, err := io.ReadAll(f)
if err != nil {
return "", fmt.Errorf("reading contents: %w", err)
}
return string(contents), nil
}
```
In this example there are two different steps which could result in an error:
opening the file and reading its contents. If an error is returned then our
imaginary caller doesn't know which step the error occurred at. Using our rule
we can infer that it would be good to annotate at _which_ step the error is
from, so the caller is able to have a fuller picture of what went wrong.
Note that each annotation does _not_ include the file path which was passed into
the function. The caller already knows this path, so an error being returned
back which reiterates the path is unnecessary.
**Example 3: Annotating which argument was involved**
In this example we will read two files using our function from example 2, and
return the concatenation of their contents as a string.
```go
func concatFiles(pathA, pathB string) (string, error) {
contentsA, err := readFile(pathA)
if err != nil {
return "", fmt.Errorf("reading contents of %q: %w", pathA, err)
}
contentsB, err := readFile(pathB)
if err != nil {
return "", fmt.Errorf("reading contents of %q: %w", pathB, err)
}
return contentsA + contentsB, nil
}
```
Like in example 2 we annotate each error, but instead of annotating the action
we annotate which file path was involved in each error. This is because if we
simply annotated with the string `reading contents` like before it wouldn't be
clear to the caller _which_ file's contents couldn't be read. Therefore we
include which path the error is relevant to.
**Example 4: Layering**
In this example we will show how using this rule habitually results in easy to
read errors which contain all relevant information surrounding the error. Our
example reads one file, the "full" file, using our `readFile` function from
example 2. It then reads the concatenation of two files, the "split" files,
using our `concatFiles` function from example 3. It finally determines if the
two strings are equal:
```go
func verifySplits(fullFilePath, splitFilePathA, splitFilePathB string) error {
fullContents, err := readFile(fullFilePath)
if err != nil {
return fmt.Errorf("reading contents of full file: %w", err)
}
splitContents, err := concatFiles(splitFilePathA, splitFilePathB)
if err != nil {
return fmt.Errorf("reading concatenation of split files: %w", err)
}
if fullContents != splitContents {
return errors.New("full file's contents do not match the split files' contents")
}
return nil
}
```
As previously, we don't annotate the file paths for the different possible
errors, but instead say _which_ files were involved. The caller already knows
the paths, there's no need to reiterate them if there's another way of referring
to them.
Let's see what our errors actually look like! We run our new function using the
following:
```go
err := verifySplits("full.txt", "splitA.txt", "splitB.txt")
fmt.Println(err)
```
Let's say `full.txt` doesn't exist, we'll get the following error:
```
reading contents of full file: opening file: open full.txt: no such file or directory
```
The error is simple, and gives you everything you need to understand what went
wrong: while attempting to read the full file, during the opening of that file,
our code found that there was no such file. In fact, the error returned by
`os.Open` contains the name of the file, which goes against our rule, but it's
the standard library so what can ya do?
Now, let's say that `splitA.txt` doesn't exist, then we'll get this error:
```
reading concatenation of split files: reading contents of "splitA.txt": opening file: open splitA.txt: no such file or directory
```
Now we did include the file path here, and so the standard library's failure to
follow our rule is causing us some repitition. But overall, within the parts of
the error we have control over, the error is concise and gives you everything
you need to know what happened.
## Exceptions
As with all rules, there are certainly exceptions. The primary one I've found is
that certain helper functions can benefit from bending this rule a bit. For
example, if there is a helper function which is called to verify some kind of
user input in many places, it can be helpful to include that input value within
the error returned from the helper function:
```go
func verifyInput(str string) error {
if err := check(str); err != nil {
return fmt.Errorf("input %q was bad: %w", str, err)
}
return nil
}
```
`str` is known to the caller so, according to our rule, we don't need to include
it in the error. But if you're going to end up wrapping the error returned from
`verifyInput` with `str` at every call site anyway it can be convenient to save
some energy and break the rule. It's a trade-off, convenience in exchange for
consistency.
Another exception might be made with regards to stack traces.
In the set of examples given above I tended to annotate each error being
returned with a description of where in the function the error was being
returned from. If your language automatically includes some kind of stack trace
with every error, and if you find that you are generally able to reconcile that
stack trace with actual code, then it may be that annotating each error site is
unnecessary, except when annotating actual runtime values (e.g. an input
string).
As in all things with programming, there are no hard rules; everything is up to
interpretation and the specific use-case being worked on. That said, I hope what
I've laid out here will prove generally useful to you, in whatever way you might
try to use it.

View File

@ -1,172 +0,0 @@
---
title: >-
F-Mail
description: >-
If email didn't suck.
---
I'm down a blog post, so I'm going to try to make up some time on this one.
Email is probably the oldest web technology which is widely recognized by the
general public. It predates WWW by about 15 years, and is fundamental to the way
we use the internet.
It also really fucking sucks.
## Thought Exercise
Let's invent email all over again, for fun. We can take the good things from the
existing email paradigm, and replace the bad. Let's not worry about marketshare
and adoption strategies and all that annoying stuff either; after all, I need to
finish this post in like.... 20 minutes... tops.
This new email will be called fmail.
The basic idea of email is solid. It's mail, on the internet. We all understand
mail. You have a mailing address, I want to send you a thing. I pay someone else
to take my thing to you, and they have some mechanism for finding you just based
on your address.
We're good so far. Let's get into the weeds.
## Addresses
Email addresses are... ok. There's a name and a domain. If you were sending a
physical package to a house with multiple residents you would include the name
of the recipient on the package, in addition to the address. With email the
domain part of the email corresponds to the house address, and the username
corresponds to the recipient's actual name.
In this aspect, however, physical mail has email beat. If the package has a
correct name it can often be routed directly to its intended recipient. But it
doesn't _have_ to have a correct name. In fact it can have no name. In those
cases the residents of the address figure out amongst themselves what to do with
it. Maybe it's obvious who it's for, maybe not. In any case it's possible to
resolve these issues.
Further, in physical mail the routing steps are declared right on the mail
container (box, envelope, etc). You can, generally, read the recipient address
from bottom to top to understand how to deliver it. Here's an example:
```
Homer
123 Fakie St
Springfield, IL 12345
USA
```
Understanding the steps is simple enough. The package first needs to get to the
United States of America, then to Springfield, then to Fakie St, then to house
123 on Fakie St, and finally to the resident named "Homer" at that house.
Let's incorporate these ideas into fmail, our new mythical internet mail system.
In fmail the address isn't an inflexible `name@domain`. Instead the address is
composed of a sequence of `>` separated strings, each denoting an intended hop
in the route. For example:
```
sick-domain.com>brian>phone
```
The sender only needs to know how to route to the first hop in order to do its
duty. In this case it's a simple domain lookup, which would tell it an IP to
send the fmail message to. From there the receiving server would need to know
what to do with `brian` as a piece of routing information. Maybe it knows, and
can send the message along. Maybe it doesn't, in which case the mail might go to
a "lost and found" directory, where anyone on the fmail server could claim it.
If the idea of a domain-wide "lost and found" sounds scary, consider that it
might not be so scary in a world where fmail servers are easy to self-host, and
so people actually do so. What would make it possible for fmail to be easy to
self-host?
## Spam
Spam has made both email and real mail almost unbearable. If I'm honest, it's
the daily chore of cleaning my two mail boxes that made start thinking about
writing this post in the first place. With email the spam issue is particularly
egregious, because the entire email ecosystem, not just the experience of the
individual, is made worse by spam.
If you want to know why it's hard to run your email server, the answer is
"because spam exists". You need to block the spam destined for you server, you
need to ensure someone isn't going to hack your server and send spam from it,
you need to convince other email servers that you're one of the good ones and
won't send spam, you need to pray your ISP even allows you to have an email
server (because they don't want to be seen as enabling spam). There's actual
_laws_ about email spam.
The good news is, fmail has solved the spam problem completely.
In fmail, all messages are rejected by default. It's a whitelist based access
control, unlike email's blacklist based one where anyone can send you anything
and it's up to you to reject what you don't want.
How can this work? There's a couple different forms the whitelist can take, and
they all can work together in your fmail server's configuration.
The primary one would be to check for some kind of cryptographic signature on
the message, declaring who its from. If the message is from a list of configured
"good senders" then it's kept. This would be for friends, family, coworkers,
etc... Those you expect to hear from frequently who you actually want to hear
from.
Building on this, each "good sender" could have a timeout associated with them,
if desired. This could be useful when signing up for a website which wants to
use fmail for authentication. You configure your fmail client (which of course
integrates nicely with a web browser to make this easy) to allow messages from
this sender only for a limited time, or only a limited number of messages from
them. This way the user can receive their fmail confirmation message, or
password reset or whatever, without being forever bothered by stupid marketing
emails.
A secondary method of whitelisting might involve someone attaching some
cryptocurrency to their message as a peace offering of sorts. It could be as
simple as a private key or signed transaction which would allow the receiver, if
they receive the message, to keep the money. It would be up to the fmail client
to allow configuration of which cryptos are accepted and how much crypto is
required, as well as ensuring that the money is still available to be received.
Only if all these requirements are met is the message allowed to be seen by a
human, otherwise it's dropped.
There's probably other interesting mechanisms I haven't thought of. It would be
good for fmail servers to have a plugin system that allowed for extending
functionality like this as the users desire.
## Encryption
One thing email sorely lacks is end-to-end encryption. This is a difficult
problem for communication systems in general, because ultimately what it comes
down to is a hard requirement on a safe exchange of public keys, which requires
an existing trusted method of communication.
I don't think fmail needs to re-invent this wheel. We've already established
that users will have some mechanism for sharing public keys (for whitelisting),
so really what this comes down to is having good UI around key management from
the start, and the stubbornness to establish e2e messages as the norm.
What holds email back in this area isn't so much the lack of solutions (there
are many ways to do e2e encryption over email) but the need for supporting
plaintext emails out of concern for backwards compatibility, as well as the need
to support open mail boxes which can receive and send mail willy-nilly. If a
whitelist-based system is built from scratch with e2e messages always being the
default way of messaging others, and plaintext messages being something with big
scary warnings around it, I don't think there'd be an issue.
## That's fmail
That's it. There's not much to it, except you know... actually implementing it
(someone else do it, I don't have time).
There's a lot more that could be said about the email protocol and server/client
implementations themselves, but I think if one were to start from scratch on
fmail it would be enough to say this: there's a lot of good things to take from
email, and really what we need is to update the mindset around internet
messaging in general.We have almost 8 billion people on earth, a double digit
percentage of them have internet access, and we need to give users better
mechanisms for ensuring their messages are received the way each one
individually wants them to be.
My dream of finishing this post in 20 minutes did not come to pass. It was more
like an hour. I'm getting faster though!

View File

@ -1,339 +0,0 @@
---
title: >-
Evaluation of Network Filesystems
description: >-
There can only be one.
series: nebula
tags: tech
---
It's been a bit since updating my progress on what I've been lately calling the
"cryptic nebula" project. When I last left off I was working on building the
[mobile nebula][mobile_nebula] using [nix][nix]. For the moment I gave up on
that dream, as flutter and nix just _really_ don't get along and I don't want to
get to distracted on problems that aren't critical to the actual goal.
Instead I'd like to pursue the next critical component of the system, and
that's a shared filesystem. The use-case I'm ultimately trying to achieve is:
* All hosts communicate with each other via the nebula network.
* All hosts are personal machines owned by individuals, _not_ cloud VMs.
* A handful of hosts are always-on, or at least as always-on as can be achieved
in a home environment.
* All hosts are able to read/write to a shared filesystem, which is mounted via
FUSE (or some other mechanism, though I can't imagine what) on their computer.
* Top-level directories within the shared filesystem can be restricted, so
that only a certain person (or host) can read/write to them.
What I'm looking for is some kind of network filesystem, of which there are
_many_. This document will attempt to evaluate all relevant projects and come up
with the next steps. It may be that no project fits the bill perfectly, and that
I'm stuck either modifying an existing project to my needs or, if things are
looking really dire, starting a new project.
The ultimate use-case here is something like a self-hosted, distributed [keybase
filesystem](https://book.keybase.io/docs/files); somewhere where individuals in
the cluster can back up their personal projects, share files with each other,
and possibly even be used as the base layer for more complex applications on
top.
The individuals involved shouldn't have to deal with configuring their
distributed FS, either to read from it or add storage resources to it. Ideally
the FS process can be bundled together with the nebula process and run opaquely;
the user is just running their "cryptic nebula" process and everything else is
handled in the background.
## Low Pass Filter
There are some criteria for these projects that I'm not willing to compromise
on; these criteria will form a low pass filter which, hopefully, will narrow our
search appreciably.
The network filesystem used by the cryptic nebula must:
* Be able to operate over a nebula network (obviously).
* Be open-source. The license doesn't matter, as long as the code is available.
* Run on both Mac and Linux.
* Not require a third-party to function.
* Allows for a replication factor of 3.
* Supports sharding of data (ie each host need not have the entire dataset).
* Allow for mounting a FUSE filesystem in any hosts' machine to interact with
the network filesystem.
* Not run in the JVM, or any other VM which is memory-greedy.
The last may come across as mean, but the reason for it is that I forsee the
network filesystem client running on users' personal laptops, which cannot be
assumed to have resources to spare.
## Rubric
Each criteria in the next set lies along a spectrum. Any project may meet one of
thses criteria fully, partially, or not at all. For each criteria I assign a
point value according to how fully a project meets the criteria, and then sum up
the points to give the project a final score. The project with the highest final
score is not necessarily the winner, but this system should at least give some
good candidates for final consideration.
The criteria, and their associated points values, are:
* **Hackability**: is the source-code of the project approachable?
- 0: No
- 1: Kind of, and there's not much of a community.
- 2: Kind of, but there is an active community.
- 3: Yes
* **Documentation**: is the project well documented?
- 0: No docs.
- 1: Incomplete or out-of-date docs.
- 2: Very well documented.
* **Transience**: how does the system handle hosts appearing or disappearing?
- 0: Requires an automated system to be built to handle adding/removing
hosts.
- 1: Gracefully handled.
* **Priority**: is it possible to give certain hosts priority when choosing
which will host/replicate some piece of data?
- 0: No.
- 1: Yes.
* **Caching**: will hosts reading a file have that file cached locally for the
next reading (until the file is modified)?
- 0: No.
- 1: Yes.
* **Conflicts**: if two hosts updated the same file at the same time, how is
that handled?
- 0: The file can no longer be updated.
- 1: One update clobbers the other, or both go through in an undefined
order.
- 2: One update is disallowed.
- 3: A copy of the file containing the "losing" update is created (ie: how
dropbox does it).
- 4: Strategy can be configured on the file/directory level.
* **Consistency**: how does the system handle a file being changed frequently?
- 0: File changes must be propagated before subsequent updates are allowed (fully consistent).
- 1: Files are snapshotted at some large-ish interval (eventually consistent).
- 2: File state (ie content hash, last modifid, etc) is propagated
frequently but contents are only fully propagated once the file has
"settled" (eventually consistent with debounce).
* **POSIX**: how POSIX compliant is the mounted fileystem?
- 0: Only the most basic features are implemented.
- 1: Some extra features are implemented.
- 2: Fully POSIX compliant.
* **Scale**: how many hosts can be a part of the cluster?
- 0: A finite number.
- 1: A finite number of dedicated hosts, infinite ephemeral.
- 2: Infinite hosts.
* **Failure**: how does the system handle failures (network partitions, hosts
hanging, buggy client versions)?
- 0: Data loss.
- 1: Reads and writes are halted.
- 2: Reads are allowed but writes are halted.
- 3: System is partially read/write, except effected parts.
* **Limitations**: are there limits on how big files can be, or how big
directories can be?
- 0: Files are limited to below 1TB in size.
- 1: Directories are limited to below 100,000 files.
- 2: No limits.
* **Encryption**: how is data encrypted?
- 0: Not at all, DIY.
- 1: Encrypted at rest.
- 2: Per-user encryption.
* **Permissions**: how are modifications to data restricted?
- 0: Not at all.
- 1: Permissions are only superifically enforced.
- 2: Fully enforced user/group restrictions, complex patterns, and/or POSIX ACLs.
* **Administration**: how much administration is required for the system to
function?
- 0: Frequent.
- 1: Infrequent.
- 2: Essentially none.
* **Simplicity**: how understandable is the system as a whole?
- 0: Very complex.
- 1: Understandable with some study.
- 2: Very simple, easy to predict.
* **Visibility**: how much visibility is available into processes within the
system?
- 0: Total black box.
- 1: Basic logging.
- 2: CLI tooling.
- 3: Exportable metrics (e.g. prometheus).
## Evaluations
With the rubric defined, let's start actually working through our options! There
are many, many different possibilities, so this may not be an exhaustive list.
### [Ceph](https://docs.ceph.com/en/latest/cephfs/index.html)
> The Ceph File System, or CephFS, is a POSIX-compliant file system built on
> top of Cephs distributed object store, RADOS. CephFS endeavors to provide a
> state-of-the-art, multi-use, highly available, and performant file store for
> a variety of applications, including traditional use-cases like shared home
> directories, HPC scratch space, and distributed workflow shared storage.
- Hackability: 2. Very active community, but it's C++.
- Documentation: 2. Hella docs, very daunting.
- Transience: 0. Adding hosts seems to require multiple configuration steps.
- Priority: 1. There is fine-tuning on a per-host basis.
- Caching: 1. Clients can cache both metadata and block data.
- Conflicts: 1. The FS behaves as much like a real FS as possible.
- Consistency: 0. System is CP.
- POSIX: 2. Fully POSIX compliant.
- Scale: 2. Cluster can grow without any real bounds.
- Failure: 3. There's no indication anywhere that Ceph goes into any kind of cluster-wide failure mode.
- Limitations: 2. There are performance considerations with large directories, but no hard limits.
- Encryption: 0. None to speak of.
- Permissions: 2. POSIX ACLs supported.
- Administration: 1. This is a guess, but Ceph seems to be self-healing in general, but still needs hand-holding in certain situations (adding/removing nodes, etc...)
- Simplicity: 0. There are many moving pieces, as well as many different kinds of processes and entities.
- Visibility: 3. Lots of tooling to dig into the state of the cluster, as well as a prometheus module.
TOTAL: 22
#### Comments
Ceph has been recommended to me by a few people. It is clearly a very mature
project, though that maturity has brought with it a lot of complexity. A lot of
the complexity of Ceph seems to be rooted in its strong consistency guarantees,
which I'm confident it fulfills well, but are not really needed for the
use-case I'm interested in. I'd prefer a simpler, eventually consistent,
system. It's also not clear to me that Ceph would even perform very well in my
use-case as it seems to want an actual datacenter deployment, with beefy
hardware and hosts which are generally close together.
### [GlusterFS](https://docs.gluster.org/en/latest/)
> GlusterFS is a scalable network filesystem suitable for data-intensive tasks
> such as cloud storage and media streaming. GlusterFS is free and open source
> software and can utilize common off-the-shelf hardware.
- Hackability: 2. Mostly C code, but there is an active community.
- Documentation: 2. Good docs.
- Transience: 0. New nodes cannot add themselves to the pool.
- Priority: 0. Data is distributed based on consistent hashing algo, nothing else.
- Caching: 1. Docs mention client-side caching layer.
- Conflicts: 0. File becomes frozen, manual intervention is needed to save it.
- Consistency: 0. Gluster aims to be fully consistent.
- POSIX: 2. Fully POSIX compliant.
- Scale: 2. No apparent limits.
- Failure: 3. Clients determine on their own whether or not they have a quorum for a particular sub-volume.
- Limitations: 2. Limited by the file system underlying each volume, I think.
- Encryption: 2. Encryption can be done on the volume level, each user could have a private volume.
- Permissions: 2. ACL checking is enforced on the server-side, but requires syncing of users and group membership across servers.
- Administration: 1. Beyond adding/removing nodes the system is fairly self-healing.
- Simplicity: 1. There's only one kind of server process, and the configuration of volumes is is well documented and straightforward.
- Visibility: 3. Prometheus exporter available.
TOTAL: 23
#### Comments
GlusterFS was my initial choice when I did a brief survey of DFSs for this
use-case. However, after further digging into it I think it will suffer the
same ultimate problem as CephFS: too much consistency for a wide-area
application like I'm envisioning. The need for syncing user/groups across
machines as actual system users is also cumbersome enough to make it not a
great choice.
### [MooseFS](https://moosefs.com/)
> MooseFS is a Petabyte Open Source Network Distributed File System. It is easy
> to deploy and maintain, highly reliable, fault tolerant, highly performing,
> easily scalable and POSIX compliant.
>
> MooseFS spreads data over a number of commodity servers, which are visible to
> the user as one resource. For standard file operations MooseFS acts like
> ordinary Unix-like file system.
- Hackability: 2. All C code, pretty dense, but backed by a company.
- Documentation: 2. There's a giant PDF you can read through like a book. I
guess that's.... good?
- Transience: 0. Nodes must be added manually.
- Priority: 1. There's "Storage Classes".
- Caching: 1. Caching is done on the client, and there's some synchronization
with the master server around it.
- Conflicts: 1. Both update operations will go through.
- Consistency: 0. Afaict it's a fully consistent system, with a master server
being used to synchronize changes.
- POSIX: 2. Fully POSIX compliant.
- Scale: 2. Cluster can grow without any real bounds.
- Failure: 1. If the master server is unreachable then the client can't
function.
- Limitations: 2. Limits are very large, effectively no limit.
- Encryption: 0. Docs make no mention of encryption.
- Permissions: 1. Afaict permissions are done by the OS on the fuse mount.
- Administration: 1. It seems that if the topology is stable there shouldn't be
much going on.
- Simplicity: 0. There are many moving pieces, as well as many different kinds of processes and entities.
- Visibility: 2. Lots of cli tooling, no prometheus metrics that I could find.
TOTAL: 17
Overall MooseFS seems to me like a poor-developer's Ceph. It can do exactly the
same things, but with less of a community around it. The sale's pitch and
feature-gating also don't ingratiate it to me. The most damning "feature" is the
master metadata server, which acts as a SPOF and only sort of supports
replication (but not failover, unless you get Pro).
## Cutting Room Floor
The following projects were intended to be reviewed, but didn't make the cut for
various reasons.
* Tahoe-LAFS: The FUSE mount (which is actually an SFTP mount) doesn't support
mutable files.
* HekaFS: Doesn't appear to exist anymore(?)
* IPFS-cluster: Doesn't support sharding.
* MinFS: Seems to only work off S3, no longer maintained anyway.
* DRDB: Linux specific, no mac support.
* BeeGFS: No mac support (I don't think? I couldn't find any indication it
supports macs at any rate).
* NFS: No support for sharding the dataset.
## Conclusions
Going through the featuresets of all these different projects really helped me
focus in on how I actually expect this system to function, and a few things
stood out to me:
* Perfect consistency is not a goal, and is ultimately harmful for this
use-case. The FS needs to propagate changes relatively quickly, but if two
different hosts are updating the same file it's not necessary to synchronize
those updates like a local filesystem would; just let one changeset clobber
the other and let the outer application deal with coordination.
* Permissions are extremely important, and yet for all these projects are
generally an afterthought. In a distributed setting we can't rely on the OS
user/groups of a host to permission read/write access. Instead that must be
done primarily via e2e encryption.
* Transience is not something most of these project expect, but is a hard
requirement of this use-case. In the long run we need something which can be
run on home hardware on home ISPs, which is not reliable at all. Hosts need to
be able to flit in and out of existence, and the cluster as a whole needs to
self-heal through that process.
In the end, it may be necessary to roll our own project for this, as I don't
think any of the existing distributed file systems are suitable for what's
needed.
[mobile_nebula]: https://github.com/cryptic-io/mobile_nebula
[nix]: https://nixos.org/manual/nix/stable/

View File

@ -1,436 +0,0 @@
---
title: >-
Ripple V2: A Better Game
description: >-
The sequel no one was waiting for!
tags: tech
series: ripple
---
<p>
<b>Movement:</b> Arrow keys or WASD<br/>
<b>Jump:</b> Space<br/>
<b>Goal:</b> Jump as many times as possible without touching a ripple!<br/>
<br/>
<b>Press Jump To Begin!</b>
</p>
_Who can make the muddy water clear?<br/>
Let it be still, and it will gradually become clear._
<canvas id="canvas"
style="border:1px dashed #AAA"
tabindex=0>
Your browser doesn't support canvas. At this point in the world that's actually
pretty cool, well done!
</canvas>
<button onclick="reset()">(R)eset</button>
<span style="font-size: 2rem; margin-left: 1rem;">Score:
<span style="font-weight: bold" id="score">0</span>
</span>
<script type="text/javascript">
const palette = [
"#264653",
"#2A9D8F",
"#E9C46A",
"#F4A261",
"#E76F51",
];
const width = 800;
const height = 600;
function hypotenuse(w, h) {
return Math.sqrt(Math.pow(w, 2) + Math.pow(h, 2));
}
let canvas = document.getElementById("canvas");
canvas.width = width;
canvas.height = height;
const whitelistedKeys = {
"ArrowUp": {},
"KeyW": {map: "ArrowUp"},
"ArrowLeft": {},
"KeyA": {map: "ArrowLeft"},
"ArrowRight": {},
"KeyD": {map: "ArrowRight"},
"ArrowDown": {},
"KeyS": {map: "ArrowDown"},
"Space": {},
"KeyR": {},
};
let keyboard = {};
canvas.addEventListener('keydown', (event) => {
let keyInfo = whitelistedKeys[event.code];
if (!keyInfo) return;
let code = event.code;
if (keyInfo.map) code = keyInfo.map;
event.preventDefault();
keyboard[code] = true;
});
canvas.addEventListener('keyup', (event) => {
let keyInfo = whitelistedKeys[event.code];
if (!keyInfo) return;
let code = event.code;
if (keyInfo.map) code = keyInfo.map;
event.preventDefault();
delete keyboard[code];
});
const C = 700; // scales the overall speed of the radius
const T = 500; // on which tick the radius change becomes linear
/*
f(x) = sqrt(C*x) when x < T
(C/(2*sqrt(CT)))(x-T) + sqrt(CT) when x >= T
radius(x) = f(x) + playerRadius;
*/
const F1 = (x) => Math.sqrt(C*x);
const F2C1 = C / (2 * Math.sqrt(C*T));
const F2C2 = Math.sqrt(C * T);
const F2 = (x) => (F2C1 * (x - T)) + F2C2;
const F = (x) => {
if (x < T) return F1(x);
return F2(x);
};
class Ripple {
constructor(id, currTick, x, y, bounces, color) {
this.id = id;
this.tick = currTick;
this.x = x;
this.y = y;
this.thickness = Math.pow(bounces+1, 1.25);
this.color = color;
this.winner = false;
this.maxRadius = hypotenuse(x, y);
this.maxRadius = Math.max(this.maxRadius, hypotenuse(width-x, y));
this.maxRadius = Math.max(this.maxRadius, hypotenuse(x, height-y));
this.maxRadius = Math.max(this.maxRadius, hypotenuse(width-x, height-y));
}
radius(currTick) {
const x = currTick - this.tick;
return F(x) + playerRadius;
}
draw(ctx, currTick) {
ctx.beginPath();
ctx.arc(this.x, this.y, this.radius(currTick), 0, Math.PI * 2, false);
ctx.closePath();
ctx.lineWidth = this.thickness;
ctx.strokeStyle = this.winner ? "#FF0000" : this.color;
ctx.stroke();
}
canGC(currTick) {
return this.radius(currTick) > this.maxRadius;
}
}
const playerRadius = 10;
const playerMoveAccel = 0.5;
const playerMoveDecel = 0.7;
const playerMaxMoveSpeed = 4;
const playerJumpSpeed = 0.08;
const playerMaxHeight = 1;
const playerGravity = 0.01;
class Player{
constructor(x, y, color) {
this.x = x;
this.y = y;
this.z = 0;
this.xVelocity = 0;
this.yVelocity = 0;
this.zVelocity = 0;
this.color = color;
this.falling = false;
this.lastJumpHeight = 0;
this.loser = false;
}
act() {
if (keyboard["ArrowUp"]) {
this.yVelocity = Math.max(-playerMaxMoveSpeed, this.yVelocity - playerMoveAccel);
} else if (keyboard["ArrowDown"]) {
this.yVelocity = Math.min(playerMaxMoveSpeed, this.yVelocity + playerMoveAccel);
} else if (this.yVelocity > 0) {
this.yVelocity = Math.max(0, this.yVelocity - playerMoveDecel);
} else if (this.yVelocity < 0) {
this.yVelocity = Math.min(0, this.yVelocity + playerMoveDecel);
}
this.y += this.yVelocity;
this.y = Math.max(0+playerRadius, this.y);
this.y = Math.min(height-playerRadius, this.y);
if (keyboard["ArrowLeft"]) {
this.xVelocity = Math.max(-playerMaxMoveSpeed, this.xVelocity - playerMoveAccel);
} else if (keyboard["ArrowRight"]) {
this.xVelocity = Math.min(playerMaxMoveSpeed, this.xVelocity + playerMoveAccel);
} else if (this.xVelocity > 0) {
this.xVelocity = Math.max(0, this.xVelocity - playerMoveDecel);
} else if (this.xVelocity < 0) {
this.xVelocity = Math.min(0, this.xVelocity + playerMoveDecel);
}
this.x += this.xVelocity;
this.x = Math.max(0+playerRadius, this.x);
this.x = Math.min(width-playerRadius, this.x);
let jumpHeld = keyboard["Space"];
if (jumpHeld && !this.falling && this.z < playerMaxHeight) {
this.lastJumpHeight = 0;
this.zVelocity = playerJumpSpeed;
} else {
this.zVelocity = Math.max(-playerJumpSpeed, this.zVelocity - playerGravity);
this.falling = this.z > 0;
}
let prevZ = this.z;
this.z = Math.max(0, this.z + this.zVelocity);
this.lastJumpHeight = Math.max(this.z, this.lastJumpHeight);
}
draw(ctx) {
let y = this.y - (this.z * 40);
let radius = playerRadius * (this.z+1)
// draw main
ctx.beginPath();
ctx.arc(this.x, y, radius, 0, Math.PI * 2, false);
ctx.closePath();
ctx.lineWidth = 0;
ctx.fillStyle = this.color;
ctx.fill();
if (this.loser) {
ctx.strokeStyle = '#FF0000';
ctx.lineWidth = 2;
ctx.stroke();
}
// draw shadow, if in the air
if (this.z > 0) {
let radius = Math.max(0, playerRadius * (1.2 - this.z));
ctx.beginPath();
ctx.arc(this.x, this.y, radius, 0, Math.PI * 2, false);
ctx.closePath();
ctx.lineWidth = 0;
ctx.fillStyle = this.color+"33";
ctx.fill();
}
}
}
class Game {
constructor(canvas, scoreEl) {
this.currTick = 0;
this.player = new Player(width/2, height/2, palette[0]);
this.state = 'play';
this.score = 0;
this.scoreEl = scoreEl;
this.canvas = canvas;
this.ctx = canvas.getContext("2d");
this.ripples = [];
this.nextRippleID = 0;
}
shouldReset() {
return keyboard['KeyR'];
}
newRippleID() {
let id = this.nextRippleID;
this.nextRippleID++;
return id;
}
// newRipple initializes and stores a new ripple at the given coordinates, as
// well as all sub-ripples which make up the initial ripple's reflections.
newRipple(x, y, bounces, color) {
color = color ? color : palette[Math.floor(Math.random() * palette.length)];
let ripplePos = [];
let nextRipples = [];
let addRipple = (x, y) => {
for (let i in ripplePos) {
if (ripplePos[i][0] == x && ripplePos[i][1] == y) return;
}
let ripple = new Ripple(this.newRippleID(), this.currTick, x, y, bounces, color);
nextRipples.push(ripple);
ripplePos.push([x, y]);
this.ripples.push(ripple);
};
// add initial ripple, after this we deal with the sub-ripples.
addRipple(x, y);
while (bounces > 0) {
bounces--;
let prevRipples = nextRipples;
nextRipples = [];
for (let i in prevRipples) {
let prevX = prevRipples[i].x;
let prevY = prevRipples[i].y;
addRipple(prevX, -prevY);
addRipple(-prevX, prevY);
addRipple((2*this.canvas.width)-prevX, prevY);
addRipple(prevX, (2*this.canvas.height)-prevY);
}
}
}
// playerRipplesState returns a mapping of rippleID -> boolean, where each
// boolean indicates the ripple's relation to the player at the moment. true
// indicates the player is outside the ripple, false indicates the player is
// within the ripple.
playerRipplesState() {
let state = {};
for (let i in this.ripples) {
let ripple = this.ripples[i];
let rippleRadius = ripple.radius(this.currTick);
let hs = Math.pow(ripple.x-this.player.x, 2) + Math.pow(ripple.y-this.player.y, 2);
state[ripple.id] = hs > Math.pow(rippleRadius + playerRadius, 2);
}
return state;
}
playerHasJumpedOverRipple(prev, curr) {
for (const rippleID in prev) {
if (!curr.hasOwnProperty(rippleID)) continue;
if (curr[rippleID] != prev[rippleID]) return true;
}
return false;
}
update() {
if (this.state != 'play') return;
let playerPrevZ = this.player.z;
this.player.act();
if (playerPrevZ == 0 && this.player.z > 0) {
// player has jumped
this.prevPlayerRipplesState = this.playerRipplesState();
} else if (playerPrevZ > 0 && this.player.z == 0) {
// player has landed, don't produce a ripple unless there are no
// existing ripples or the player jumped over an existing one.
if (
this.ripples.length == 0 ||
this.playerHasJumpedOverRipple(
this.prevPlayerRipplesState,
this.playerRipplesState()
)
) {
let bounces = Math.floor((this.player.lastJumpHeight*1.8)+1);
console.log("spawning ripple with bounces:", bounces);
this.newRipple(this.player.x, this.player.y, bounces);
this.score += bounces;
}
}
if (this.player.z == 0) {
for (let i in this.ripples) {
let ripple = this.ripples[i];
let rippleRadius = ripple.radius(this.currTick);
if (rippleRadius < playerRadius * 1.5) continue;
let hs = Math.pow(ripple.x-this.player.x, 2) + Math.pow(ripple.y-this.player.y, 2);
if (hs > Math.pow(rippleRadius + playerRadius, 2)) {
continue;
} else if (hs <= Math.pow(rippleRadius - playerRadius, 2)) {
continue;
} else {
console.log("game over", ripple);
ripple.winner = true;
this.player.loser = true;
this.state = 'gameOver';
// deliberately don't break here, in case multiple ripples hit
// the player on the same frame
}
}
}
this.ripples = this.ripples.filter(ripple => !ripple.canGC(this.currTick));
this.currTick++;
}
draw() {
this.ctx.clearRect(0, 0, this.canvas.width, this.canvas.height);
this.ripples.forEach(ripple => ripple.draw(this.ctx, this.currTick));
this.player.draw(this.ctx)
this.scoreEl.innerHTML = this.score;
}
}
const requestAnimationFrame =
window.requestAnimationFrame ||
window.mozRequestAnimationFrame ||
window.webkitRequestAnimationFrame ||
window.msRequestAnimationFrame;
let game = new Game(canvas, document.getElementById("score"));
function reset() {
game = new Game(canvas, document.getElementById("score"));
}
function nextFrame() {
if (game.shouldReset()) reset();
game.update()
game.draw()
requestAnimationFrame(nextFrame);
}
requestAnimationFrame(nextFrame);
canvas.focus();
</script>
## Changelog
There's been two major changes to the mechanics of the game since the previous
version:
* A new ripple is created _only_ if there are no ripples on the field already,
or if the player has jumped over an existing ripple.
* The score is increased only if a ripple is created, and is increased by the
number of bounces off the wall that ripple will have. Put another way, the
score is increased based on how high you jump.
Other small changes include:
* Ripple growth rate has been modified. It's now harder for a player to run into
the ripple they just created.
* Ripple thickness indicates how many bounces are left in the ripple. This was
the case previously, but it's been made more obvious.
* Small performance improvements.

View File

@ -1,249 +0,0 @@
---
title: >-
Composing Processes Into a Static Binary With Nix
description: >-
Goodbye, docker-compose!
tags: tech
---
It's pretty frequent that one wants to use a project that requires multiple
processes running. For example, a small web api which uses some database to
store data in, or a networking utility which has some monitoring process which
can be run alongside it.
In these cases it's extremely helpful to be able to compose these disparate
processes together into a single process. From the user's perspective it's much
nicer to only have to manage one process (even if it has hidden child
processes). From a dev's perspective the alternatives are: finding libraries in
the same language which do the disparate tasks and composing them into the same
process via import, or (if such libraries don't exist, which is likely)
rewriting the functionality of all processes into a new, monolithic project
which does everything; a huge waste of effort!
## docker-compose
A tool I've used before for process composition is
[docker-compose][docker-compose]. While it works well for composition, it
suffers from the same issues docker in general suffers from: annoying networking
quirks, a questionable security model, and the need to run the docker daemon.
While these issues are generally surmountable for a developer or sysadmin, they
are not suitable for a general-purpose project which will be shipped to average
users.
## nix-bundle
Enter [nix-bundle][nix-bundle]. This tools will take any [nix][nix] derivation
and construct a single static binary out of it, a la [AppImage][appimage].
Combined with a process management tool like [circus][circus], nix-bundle
becomes a very useful tool for composing processes together!
To demonstrate this, we'll be looking at putting together a project I wrote
called [markov][markov], a simple REST API for building [markov
chains][markov-chain] which is written in [go][golang] and backed by
[redis][redis].
## Step 1: Building Individual Components
Step one is to get [markov][markov] and its dependencies into a state where it
can be run with [nix][nix]. Doing this is fairly simple, we merely use the
`buildGoModule` function:
```
pkgs.buildGoModule {
pname = "markov";
version = "618b666484566de71f2d59114d011ff4621cf375";
src = pkgs.fetchFromGitHub {
owner = "mediocregopher";
repo = "markov";
rev = "618b666484566de71f2d59114d011ff4621cf375";
sha256 = "1sx9dr1q3vr3q8nyx3965x6259iyl85591vx815g1xacygv4i4fg";
};
vendorSha256 = "048wygrmv26fsnypsp6vxf89z3j0gs9f1w4i63khx7h134yxhbc6";
}
```
This expression results in a derivation which places the markov binary at
`bin/markov`.
The other component we need to run markov is [redis][redis], which conveniently
is already packaged in nixpkgs as `pkg.redis`.
## Step 2: Composing Using Circus
[Circus][circus] can be configured to run multiple processes at the same time.
It will collect the stdout/stderr logs of these processes and combine them into
a single stream, or write them to log files. If any processes fail circus will
automatically restart them. It has a simple configuration and is, overall, a
great tool for a simple project like this.
Circus also comes pre-packed in nixpkgs, so we don't need to do anything to
actually build it. We only need to configure it. To do this we'll write a bash
script which generates the configuration on-the-fly, and then runs the process
with that configuration.
This script is going to act as the "frontend" for our eventual static binary;
the user will pass in configuration parameters to this script, and this script
will translate those into the appropriate configuration for all sub-process
(markov, redis, circus). For this demo we won't go nuts with the configuration,
we'll just expose the following:
* `MARKOV_LISTEN_ADDR`: Address REST API will listen on (defaults to
`localhost:8000`).
* `MARKOV_TIMEOUT`: Expiration time of each link of the chain (defaults to 720
hours).
* `MARKOV_DATA_DIR`: Directory where data will be stored (defaults to current
working directory).
The bash script will take these params in as environment variables. The nix
expression to generate the bash script, which we'll call our entrypoint script,
will look like this (assumes that the expression to generate `bin/markov`,
defined above, is set to the `markov` variable):
```
pkgs.writeScriptBin "markov" ''
#!${pkgs.stdenv.shell}
# On every run we create new, temporary, configuration files for redis and
# circus. To do this we create a new config directory.
markovCfgDir=$(${pkgs.coreutils}/bin/mktemp -d)
echo "generating configuration to $markovCfgDir"
cat >$markovCfgDir/redis.conf <<EOF
save ""
dir "''${MARKOV_DATA_DIR:-$(pwd)}"
appendonly yes
appendfilename "markov.data"
EOF
cat >$markovCfgDir/circus.ini <<EOF
[circus]
[watcher:markov]
cmd = ${markov}/bin/markov \
-listenAddr ''${MARKOV_LISTEN_ADDR:-localhost:8000} \
-timeout ''${MARKOV_TIMEOUT:-720}
numprocesses = 1
[watcher:redis]
cmd = ${pkgs.redis}/bin/redis-server $markovCfgDir/redis.conf
numprocesses = 1
EOF
exec ${pkgs.circus}/bin/circusd $markovCfgDir/circus.ini
'';
```
By `nix-build`ing this expression we end up with a derivation with
`bin/markov`, and running that should result in the following output:
```
generating configuration to markov.VLMPwqY
2021-04-22 09:27:56 circus[181906] [INFO] Starting master on pid 181906
2021-04-22 09:27:56 circus[181906] [INFO] Arbiter now waiting for commands
2021-04-22 09:27:56 circus[181906] [INFO] markov started
2021-04-22 09:27:56 circus[181906] [INFO] redis started
181923:C 22 Apr 2021 09:27:56.063 # oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo
181923:C 22 Apr 2021 09:27:56.063 # Redis version=6.0.6, bits=64, commit=00000000, modified=0, pid=181923, just started
181923:C 22 Apr 2021 09:27:56.063 # Configuration loaded
...
```
The `markov` server process doesn't have many logs, unfortunately, but redis'
logs at least work well, and doing a `curl localhost:8000` results in the
response from the `markov` server.
At this point our processes are composed using circus, let's now bundle it all
into a single static binary!
## Step 3: nix-bundle
The next step is to run [nix-bundle][nix-bundle] on the entrypoint expression,
and nix-bundle will compile all dependencies (including markov, redis, and
circus) into a single archive file, and make that file executable. When the
archive is executed it will run our entrypoint script directly.
Getting nix-bundle is very easy, just use nix-shell!
```
nix-shell -p nix-bundle
```
This will open a shell where the `nix-bundle` binary is available on your path.
From there just run the following to construct the binary (this assumes that the
nix code described so far is stored in `markov.nix`, the full source of which
will be linked to at the end of this post):
```
nix-bundle '((import ./markov.nix) {}).entrypoint' '/bin/markov'
```
The resulting binary is called `markov`, and is 89MB. The size is a bit jarring,
considering the simplicity of the functionality, but it could probably be
trimmed by using a different process manager than circus (which requires
bundling an entire python runtime into the binary).
Running the binary directly as `./markov` produces the same result as when we
ran the entrypoint script earlier. Success! We have bundled multiple existing
processes into a single, opaque, static binary. Installation of this binary is
now as easy as copying it to any linux machine and running it.
## Bonus Step: nix'ing nix-bundle
Installing and running [nix-bundle][nix-bundle] manually is _fine_, but it'd be even better if
that was defined as part of our nix setup as well. That way any new person
wouldn't have to worry about that step, and still get the same deterministic
output from the build.
Unfortunately, we can't actually run `nix-bundle` from within a nix build
derivation, as it requires access to the nix store and that can't be done (or at
least I'm not on that level yet). So instead we'll have to settle for defining
the `nix-bundle` binary in nix and then using a `Makefile` to call it.
Defining a `nix-bundle` expression is easy enough:
```
nixBundleSrc = pkgs.fetchFromGitHub {
owner = "matthewbauer";
repo = "nix-bundle";
rev = "8e396533ef8f3e8a769037476824d668409b4a74";
sha256 = "1lrq0990p07av42xz203w64abv2rz9xd8jrzxyvzzwj7vjj7qwyw";
};
nixBundle = (import "${nixBundleSrc}/release.nix") {
nixpkgs' = pkgs;
};
```
Then the Makefile:
```make
bundle:
nix-build markov.nix -A nixBundle
./result/bin/nix-bundle '((import ./markov.nix) {}).entrypoint' '/bin/markov'
```
Now all a developer needs to rebuild the project is to do `make` within the
directory, while also having nix set up. The result will be a deterministically
built, static binary, encompassing multiple processes which will all work
together behind the scenes. This static binary can be copied to any linux
machine and run there without any further installation steps.
How neat is that!
The final source files used for this project can be found below:
* [markov.nix](/assets/markov/markov.nix.html)
* [Makefile](/assets/markov/Makefile.html)
[nix]: https://nixos.org/manual/nix/stable/
[nix-bundle]: https://github.com/matthewbauer/nix-bundle
[docker-compose]: https://docs.docker.com/compose/
[appimage]: https://appimage.org/
[circus]: https://circus.readthedocs.io/en/latest/
[markov]: https://github.com/mediocregopher/markov
[markov-chain]: https://en.wikipedia.org/wiki/Markov_chain
[golang]: https://golang.org/
[redis]: https://redis.io/

View File

@ -1,223 +0,0 @@
---
title: >-
Loops in Ginger
description: >-
Bringing it back around.
series: ginger
tags: tech
---
In previous posts in this series I went over the general idea of the ginger
programming language, and some of its properties. To recap:
* Ginger is a programming language whose syntax defines a directed graph, in the
same way that a LISP language's syntax defines nested lists.
* Graph edges indicate an operation, while nodes indicate a value.
* The special values `in` and `out` are used when interpreting a graph as a
function.
* A special node type, the tuple, is defined as being a node whose value is an
ordered set of input edges.
* Another special node type, the fork, is the complement to the tuple. A fork is
defined as being a node whose value is an ordered set of output edges.
* The special `if` operation accepts a 2-tuple, the first value being some state
value and the second being a tuple. The `if` operation expects to be directed
towards a 2-fork. If the boolean is true then the top output edge of the fork
is taken, otherwise the bottom is taken. The state value is what's passed to
the taken edge.
There were some other detail rules but I don't remember them off the top of my
head.
## Loops
Today I'd like to go over my ideas for how loops would work in ginger. With
loops established ginger would officially be a Turing complete language and,
given time and energy, real work could actually begin on it.
As with conditionals I'll start by establishing a base example. Let's say we'd
like to define an operation which prints out numbers from 0 up to `n`, where `n`
is given as an argument. In go this would look like:
```go
func printRange(n int) int {
for i := 0; i < n; i++ {
fmt.Println(i)
}
}
```
With that established, let's start looking at different patterns.
## Goto
In the olden days the primary looping construct was `goto`, which essentially
teleports the program counter (aka instruction pointer) to another place in the
execution stack. Pretty much any other looping construct can be derived from
`goto` and some kind of conditional, so it's a good starting place when
considering loops in ginger.
```
(in -println-> } -incr-> out) -> println-incr
0 -> } -> } -if-> { -> out
in -> } -eq-> } { -> } -upd-> } -+
^ 0 -> } |
| println-incr -> } |
| |
+--------------------------------+
```
(Note: the `upd` operation is used here for convenience. It takes in three
arguments: A tuple, an index, and an operation. It applies the operation to the
tuple element at the given index, and returns a new tuple with that index set to
the value returned.)
Here `goto` is performed using a literal arrow going from the right to left.
it's ugly and hard to write, and would only be moreso the more possible gotos an
operation has.
It also complicates our graphs in a significant way: up till now ginger graphs
have have always been directed _acyclic_ graphs (DAGs), but by introducing this
construct we allow that graphs might be cyclic. It's not immediately clear to me
what the consequences of this will be, but I'm sure they will be great. If
nothign else it will make the compiler much more complex, as each value can no
longer be defined in terms of its input edge, as that edge might resolve back to
the value itself.
While conceptually sound, I think this strategy fails the practability test. We
can do better.
## While
The `while` construct is the basic looping primitive of iterative languages
(some call it `for`, but they're just lying to themselves).
Try as I might, I can't come up with a way to make `while` work with ginger.
`while` ultimately relies on scoped variables being updated in place to
function, while ginger is based on the concept of pipelining a set of values
through a series of operations. From the point of view of the programmer these
operations are essentially immutable, so the requirement of a variable which can
be updated in place cannot be met.
## Recur
This pattern is based on how many functional languages, for example erlang,
handle looping. Rather than introducing new primitives around looping, these
language instead ensure that tail calls are properly optimized and uses those
instead. So loops are implemented as recursive function calls.
For ginger to do this it would make sense to introduce a new special value,
`recur`, which could be used alongside `in` and `out` within operations. When
the execution path hits a `recur` then it gets teleported back to the `in`
value, with the input to `recur` now being the output from `in`. Usage of it
would look like:
```
(
(in -println-> } -incr-> out) -> println-incr
in -> } -if-> { -> out
in -eq-> } { -> } -upd-> } -> recur
0 -> }
println-incr -> }
) -> inner-op
0 -> } -inner-op-> out
in -> }
```
This looks pretty similar to the `goto` example overall, but with the major
difference that the looping body had to be wrapped into an inner operation. The
reason for this is that the outer operation only takes in one argument, `n`, but
the loop actually needs two pieces of state to function: `n` and the current
value. So the inner operation loops over these two pieces of state, and the
outer operation supplies `n` and an initial iteration value (`0`) to that inner
operation.
This seems cumbersome on the surface, but what other languages do (such as
erlang, which is the one I'm most familiar with) is to provide built-in macros
on top of this primitive which make it more pleasant to use. These include
function polymorphism and a more familiar `for` construct. With a decent macro
capability ginger could do the same.
The benefits here are that the graphs remain acyclic, and the syntax has not
been made more cumbersome. It follows conventions established by other
languages, and ensures the language will be capable of tail-recursion.
## Map/Reduce
Another functional strategy which is useful is that of the map/reduce power
couple. The `map` operation takes a sequence of values and an operation, and
returns a sequence of the same length where the operation has been applied to
each value in the original sequence individually. The `reduce` operation is more
complicated (and not necessary for out example), but it's essentially a
mechanism to turn a sequence of values into a single value.
For our example we only need `map`, plus one more helper operation: `range`.
`range` takes a number `n` and returns a sequence of numbers starting at `0` and
ending at `n-1`. Our print example now looks like:
```
in -range-> } -map-> out
println -> }
```
Very simple! Map/reduce is a well established pattern and is probably the
best way to construct functional programs. However, the question remains whether
these are the best _primitives_ for looping, and I don't believe they are. Both
`map` and `reduce` can be derived from conditional and looping primitives like
`if` and `recur`, and they can't do some things that those primitives can. While
I expect one of the first things which will be done in ginger is to define `map`
and `reduce` in terms of `if` and a looping primitive, and use them generously
throughout the code, I think the fact that they can be defined in terms of
lower-level primitives indicates that they aren't the right looping primitives
for ginger.
## Conclusion
Unlike with the conditionals posts, where I started out not really knowing what
I wanted to do with conditionals, I more or less knew where this post was going
from the beginning. `recur` is, in my mind, the best primitive for looping in
ginger. It provides the flexibility to be extended to any use-case, while not
complicating the structure of the language. While possibly cumbersome to
implement directly, `recur` can be used as a primitive to construct more
convenient looping operations like `map` and `reduce`.
As a final treat (lucky you!), here's `map` defined using `if` and `recur`:
```
(
in -0-> mapped-seq
in -1-> orig-seq
in -2-> op
mapped-seq -len-> i
mapped-seq -> } -if-> { -> out
orig-seq -len-> } -eq-> } { -> } -append-> } -> recur
i -> } } }
} }
orig-seq -i-> } -op-> } }
}
orig-seq -> }
op -> }
) -> inner-map
() -> } -inner-map-> out
in -0-> }
in -1-> }
```
The next step for ginger is going to be writing an actual implementation of the
graph structure in some other language (let's be honest, it'll be in go). After
that we'll need a syntax definition which can be used to encode/decode that
structure, and from there we can start actually implementing the language!

View File

@ -1,349 +0,0 @@
---
title: >-
NFTs
description: >-
Some thoughts about.
tags: tech crypto
---
NFT stands for "non-fungible token". The "token" part refers to an NFT being a
token whose ownership is recorded on a blockchain. Pretty much all
cryptocurrencies, from bitcoin to your favorite shitcoin, could be called tokens
in this sense. Each token has exactly one owner, and ownership of the token can
be transferred from one wallet to another via a transaction on the blockchain.
What sets an NFT apart from a cryptocurrency is the "non-fungible" part.
Cryptocurrency tokens are fungible; one bitcoin is the same as any other bitoin
(according to the protocol, at least), in the same way as one US dollar holds as
much value as any other US dollar. Fungibility is the property of two units of
something being exactly interchangeable.
NFTs are _not_ fungible. One is not the same as any other. Each has some piece
of data attached to it, and each is recorded separately on a blockchain as an
individual token. You can think of an NFT as a unique cryptocurrency which has a
supply of 1 and can't be divided.
Depending on the protocol used to produce an NFT, the data attached to it might
be completely independent of its identity, even. It may be possible to produce
two NFTs with the exact same data attached to them (again, depending on the
protocol used), but even so those two NFTs will be independent and not
interchangeable.
## FUD
Before getting into why NFTs are interesting, I want to first address some
common criticism I see of them online (aka, in my twitter feed). The most
common, and unfortunately least legitimate, criticism has to do with the
environmental impact of NFTs. While the impact on energy usage and the
environment when talking about bitcoin is a topic worth going into, bitcoin
doesn't support hosting NFTs and therefore that topic is irrelevant here.
Most NFTs are hosted on ethereum, which does have a comparable energy footprint
to bitcoin (it's somewhat less than half, according to the internet). _However_,
ethereum is taking actual, concrete steps towards changing its consensus
mechanism from proof-of-work (PoW) to proof-of-stake (PoS), which will cut the
energy usage of the network down to essentially nothing. The rollout plan for
Ethereum PoS covers the next couple of years, and after that we don't really
have to worry about the energy usage of NFTs any longer.
The other big criticism I hear is about the value and nature of art and what the
impact of NFTs are in that area. I'm going to talk more about this in this post,
but, simply put, I don't think that the value and nature of art are immutable,
anymore than the form of art is immutable. Perhaps NFTs _will_ change art, but
change isn't bad in itself, and furthermore I don't think they will actually
change it all that much. People will still produce art, it's only the
distribution mechanism that might change.
## Real, Useful, Boring Things
Most of the coverage around NFTs has to do with using them to represent
collectibles and art. I'd like to start by talking about other use-cases, those
where NFTs are actually "useful" (in the dull, practical sense).
Each NFT can carry some piece of data along with it. This data can be anything,
but for a practical use-case it needs to be something which indicates ownership
of some internet good. It _cannot_ be the good itself. For example, an NFT which
contains an image does not really convey the ownership of that image; anyone can
copy the image data and own that image as well (intellectual property rights be
damned!).
A real use-case for NFTs which I'm already, if accidentally, taking advantage
of, is domain name registration. I am the proud owner of the
[mediocregopher.eth][ens] domain name (the `.eth` TLD is not yet in wide usage
in browsers, but one day!). The domain name's ownership is indicated by an NFT:
whoever holds that NFT, which I currently do, has the right to change all
information attached to the `mediocregopher.eth` domain. If I want to sell the
domain all I need to do is sell the NFT, which can be done via an ethereum
transaction.
Domain names work well for NFTs because knowing the data attached to the NFT
doesn't actually do anything for you. It's the actual _ownership_ of the NFT
which unlocks value. And I think this is the key rule for where to look to apply
NFTs to practical use-cases: the ownership of the NFT has to unlock some
functionality, not the data attached to it. The functionality has to be digital
in nature, as well, as anything related to the physical world is not as easily
guaranteed.
I haven't thought of many further practical use-cases of NFTs, but we're still
in early stages and I'm sure more will come up. In any case, the practical stuff
is boring, let's talk about art.
[ens]: https://nfton.me/nft/0x57f1887a8bf19b14fc0df6fd9b2acc9af147ea85/7558304748055753202351203668187280010336475031529884349040105080320604507070
## Art, Memes, and All Wonderful Things
For many the most baffling aspect of NFTs is their use as collectibles. Indeed,
their use as collectibles is their _primary_ use right now, even though these
collectibles procur no practical value for their owner; at best they are
speculative goods, small gambles, and at worst just a complete waste of money.
How can this be?
The curmudgeons of the world would have you believe that money is only worth
spending on goods which offer practical value. If the good is neither consumable
in a way which meets a basic need, nor produces other goods of further value,
then it is worthless. Obviously NFTs fall into the "worthless" category.
Unfortunately for them, the curmudgeons don't live in reality. People spend
their money on stupid, pointless shit all the time. I'm prepared to argue that
people almost exclusively spend their money on stupid, pointless shit. The
monetary value of a good has very little to do with its ability to meet a basic
necessity or its ability to produce value (whatever that even really means), and
more to do with how owning the shiny thing or doing the fun thing makes us
stupid monkeys very happy (for a time).
Rather than bemoan NFTs, and our simple irrationality which makes them
desirable, let's embrace them as a new tool for expressing our irrationality to
the world, a tool which we have yet to fully explore.
### A Moment Captured
It's 1857 and Jean-François Millet reveals to the world what would become one of
his best known works: _The Gleaners_.
{% include image.html dir="nfts" file="gleaners.jpg" width=5354 %}
The painting depicts three peasants gleaning a field, the bulk of their harvest
already stacked high in the background. The [wikipedia entry][gleaners] has this
to say about the painting's eventual final sale:
> In 1889, the painting, then owned by banker Ferdinand Bischoffsheim, sold for
> 300,000 francs at auction. The buyer remained anonymous, but rumours were
> that the painting was coveted by an American buyer. It was announced less than
> a week later that Champagne maker Jeanne-Alexandrine Louise Pommery had
> acquired the piece, which silenced gossip on her supposed financial issues
> after leaving her grapes on the vines weeks longer than her competitors.
I think we can all breathe a sigh of relief for Jeanne-Alexandrine.
I'd like to talk about _why_ this painting was worth 300k francs, and really
what makes art valuable at all (aside from the money laundering and tax evasion
that high-value art enables). Millet didn't merely take a picture using paints
and canvas, an exact replica of what his eyes could see. It's doubtful this
scene ever played out in reality, exactly as depicted, at all! It existed only
within Millet himself.
In _The Gleaners_ Millet captured far more than an image: the image itself
conveys the struggle of a humble life, the joy of the harvest, the history of
the french peasantry (and therefore the other french societal classes as well),
the vastness of the world compared to our little selves, and surely many other
things, each dependant on the viewer. The image conveys emotions, and most
importantly it conveys emotions captured at a particular moment, a moment which
no longer exists and will never exist again. The capturing of such a moment by
an artist capable of doing it some justice, so others can experience it to any
significant degree far into the future, is a rare event.
Access to that rare moment is what is being purchased for 300k francs. We refer
to the painting as the "original", but really the painting is only the
first-hand reproduction of the moment, which is the true original, and proximity
to the true original is what is being purchased. All other reproductions must be
based on this first-hand one (be they photographs or painted copies), and are
therefore second and third-hand.
Consider the value of a concert ticket; it is based on both how much in demand
the performance is, how close to the performance the seating section is, and how
many seats in that section there are. When one purchases the "original" _The
Gleaners_, one is purchasing a front-row ticket to a world-class performance at
a venue with only one seat. That is why it was worth 300k francs.
I have one final thing to say here and then I'll move onto the topic at hand:
the history of the work compounds its value as well. _The Gleaners_ conveys an
emotion, but knowing the critical reaction of the french elite at its first
unveiling can add to that emotion.
Again, from the [wiki entry][gleaners]:
> Millet's The Gleaners was also not perceived well due to its large size, 33
> inches by 44 inches, or 84 by 112 centimetres. This was large for a painting
> depicting labor. Normally this size of a canvas was reserved for religious or
> mythological style paintings. Millet's work did not depict anything
> religiously affiliated, nor was there any reference to any mythological
> beliefs. The painting illustrated a realistic view of poverty and the working
> class. One critic commented that "his three gleaners have gigantic
> pretensions, they pose as the Three Fates of Poverty...their ugliness and
> their grossness unrelieved."
Now scroll back up and see if you don't now have more affinity for the painting
than before you knew that. If so, then the face value just went up, just a
little bit.
[gleaners]: https://en.wikipedia.org/wiki/The_Gleaners
### The Value of an NFT
With this acknowledgement of _why_ people desire art, we can understand why they
would want an NFT depicting an artwork.
A few days ago an NFT of this image sold for almost $500k:
{% include image.html dir="nfts" file="disaster-girl.jpg" width=2560 %}
Most of the internet knows this image as _Disaster Girl_, a meme which has been
around since time immemorial (from the internet's perspective, anyway, in
reality it was taken in 2007). The moment captured is funny, the girl in the
image smiling as if she had set the fire which blazes in the background. But, as
with _The Gleaners_, the image itself isn't everything. The countless usages of
the image, the original and all of its remixes, all passed around as memes on
the internet for the past 14 years, have all worked to add to the image's
demand. _Disaster Girl_ is no longer just a funny picture or a versatile meme
format, it's a piece of human history and nostalgia.
Unlike physical paintings, however, internet memes are imminently copyable. If
they weren't they could hardly function as memes! We can only have one
"original" _The Gleaners_, but anyone with a computer can have an exact, perfect
copy of the original _Disaster Girl_, such that there's no true original. But if
I were to put up an NFT of _Disaster Girl_ for sale, I wouldn't get a damned
penny for it (probably). Why was that version apparently worth $500k?
The reason is that the seller is the girl in the image herself, now 21 years old
and in college. I have no particular connection to _Disaster Girl_, so buying an
NFT from me would be like buying a print of _The Gleaners_ off some rando in the
street; just a shallow copy, worth only the material it's printed on plus some
labor, and nothing more. But when Disaster Girl herself sells the NFT, then the
buyer is actually part of the moment, they are entering themselves into the
history of this meme that the whole world has taken a part in for the last 14
years! $500k isn't so unreasonable in that light.
### Property on the Internet
I don't make it a secret that I consider "intellectual property" to be a giant
fucking scam that the world has unfortunately bought into. Data, be it a
physical book or a digital file, is essentially free to copy, and so any price
placed on the copying or sharing of knowledge is purely artificial. But we don't
have an alternate mechanism for paying producers of knowledge and art, and so we
continue to treat data as property even though it bears absolutely no
resemblance to anything of the kind.
Disaster Girl has not, to my knowledge, asserted any property rights on the
image of herself. Doing so in any real sense, beyond going after a handful of
high-value targets who might settle a lawsuit, is simply not a feasible option.
Instead, by selling an NFT, Disaster Girl has been compensated for her labor
(meager as it was) in a way which was proportional to its impact on the world,
all without the invocation of the law. A great success!
Actually, the labor was performed by Disaster Girl's father, who took the
original image and sent it into a photo contest or something. What would have
happened if the NFT was sold in his name? I imagine that it would not have sold
for nearly as much. This makes sense to me, even if it does not make sense from
a purely economical point of view. Disaster Girl's father did the work in the
moment, but being a notable figure to the general public is its own kind of
labor, and it's likely that his daughter has born the larger burden over time.
The same logic applies to why we pay our movie stars absurd amounts even while
the crew makes a "normal" wage.
Should the father not then get compensated at all? I think he should, and I
think he could! If he were to produce an NFT of his own, of the exact same
image, it would also fetch a decent price. Probably not 6 figures, possibly not
even 4, but considering the actual contribution he made (taking a picture and
uploading it), I think the price would be fair. How many photographers get paid
anything at all for their off-hand pictures of family outings?
And this is the point I'd like to make: an NFT's price, like in all art, is
proportional to the distance to the moment captured. The beauty is that this
distance is purely subjective; it is judged not by rules set down in law by
fallable lawyers, but instead by the public at large. It is, in essence, a
democritization of intellectual property disputes. If multiple people claim to
having produced a single work, let them all produce an NFT, and the market will
decide what each of their work is worth.
Will the market ever be wrong? Certainly. But will it distribute the worth more
incorrectly than our current system, where artists must sell their rights to a
large publisher in order to see a meager profit, while the publisher rakes in
the vastly larger share? I sincerely doubt it.
### Content Creation
Another interesting mechanism of NFTs is that some platforms (e.g.
[Rarible][rarible]) allow the seller to attach a royalty percentage to the NFT
being solde. When this is done it means the original seller will receive some
percentage of all future sales of that NFT.
I think this opens some interesting possibilities for content creators. Normally
a content creator would need to sell ads or subscriptions in order to profit
from their content, but if they instead/in addition sell NFTs associated with
their content (e.g. one per episode of their youtube show) they can add another
revenue stream. As their show, or whatever, begins to take off, older NFTs
become more valuable, and the content creator can take advantage of that new
increased value via royalties set on the NFTs.
There's some further interesting side-effects that come from using NFTs in this
way. If a creator releases a work, and a corresponding NFT for that work, their
incentive is no longer to gate access to that work (as it would be in our
current IP system) or burden the work with advertisements and pleas for
subscriptions/donations. There's an entirely new goalpost for the creator:
actual value to others.
The value of the NFT is based entirely and arbitrarily on other's feelings
towards the original work, and so it is in the creator's interest to increase
the visibility and virality of the work. We can expect a creator who has sold an
NFT for a work, with royalties attached, to actively ensure there is as
little gatekeeping around the work as possible, and to create work which is
completely platform-agnostic and available absolutely everywhere. Releasing a
work as public-domain could even become a norm, should NFTs prove more
profitable than other revenue streams.
### Shill Gang
While the content creator's relationship with their platform(s) will change
drastically, I also expect that their relationship with their fans, or really
their fan's relationship with the creator's work, will change even more. Fans
are no longer passive viewers, they can have an actual investment in a work's
success. Where fans currently shill their favorite show or game or whatever out
of love, they can now also do it for personal profit. I think this is the worst
possible externality of NFTs I've encountered: internet fandom becoming orders
of magnitude more fierce and unbearable, as they relentlessly shill their
investments to the world at large.
There is one good thing to come out of this new fan/content relationship though,
and that's the fan's role in distribution and preservation of work. Since fans
now have a financial incentive to see a work persist into the future, they will
take it upon themselves to ensure that the works won't accidentally fall off the
face of the internet (as things often do). This can be difficult currently since
work is often tied down with IP restrictions, but, as we've established, work
which uses NFTs for revenue is incentivized to _not_ tie itself down in any way,
so fans will have much more freedom in this respect.
[rarible]: https://rarible.com/
### Art
It seems unlikely to me that art will cease to be created, or cease to be
valuable. The human creative instinct comes prior to money, and we have always
created art regardless of economic concerns. It's true that the nature of our
art changes according to economics (don't forget to hit that "Follow" button at
the top!), but if anything I think NFTs can change our art for the better. Our
work can be more to the point, more accessible, and less encumbered by legal
bullshit.
## Fin
That crypto cat is out of the bag, at this point, and I doubt if there's
anything that can put it back. The world has never before had the tools that
cryptocurrency and related technologies (like NFTs) offer, and our lives will
surely change as new uses of these tools make themselves apparent. I've tried to
extrapolate some uses and changes that could come out of NFTs here, but I have
no doubt that I've missed or mistook some.
It's my hope that this post has at least offered some food-for-thought related
to NFTs, beyond the endless hot takes and hype that can be found on social
media, and that the reader can now have a bigger picture view of NFTs and where
they might take us as a society, should we embrace them.

View File

@ -1,442 +0,0 @@
---
title: >-
Ripple V3
description: >-
We're getting there!
tags: tech
series: ripple
---
<p>
<b>Movement:</b> Arrow keys or WASD<br/>
<b>Jump:</b> Space<br/>
<b>Goal:</b> Jump as many times as possible without touching a ripple!<br/>
<br/>
<b>Press Jump To Begin!</b>
</p>
_Who can make the muddy water clear?<br/>
Let it be still, and it will gradually become clear._
<canvas id="canvas"
style="border:1px dashed #AAA"
tabindex=0>
Your browser doesn't support canvas. At this point in the world that's actually
pretty cool, well done!
</canvas>
<button onclick="reset()">(R)eset</button>
<span style="font-size: 2rem; margin-left: 1rem;">Score:
<span style="font-weight: bold" id="score">0</span>
</span>
<script type="text/javascript">
const palette = [
"#264653",
"#2A9D8F",
"#E9C46A",
"#F4A261",
"#E76F51",
];
const width = 800;
const height = 600;
function hypotenuse(w, h) {
return Math.sqrt(Math.pow(w, 2) + Math.pow(h, 2));
}
let canvas = document.getElementById("canvas");
canvas.width = width;
canvas.height = height;
const whitelistedKeys = {
"ArrowUp": {},
"KeyW": {map: "ArrowUp"},
"ArrowLeft": {},
"KeyA": {map: "ArrowLeft"},
"ArrowRight": {},
"KeyD": {map: "ArrowRight"},
"ArrowDown": {},
"KeyS": {map: "ArrowDown"},
"Space": {},
"KeyR": {},
};
let keyboard = {};
canvas.addEventListener('keydown', (event) => {
let keyInfo = whitelistedKeys[event.code];
if (!keyInfo) return;
let code = event.code;
if (keyInfo.map) code = keyInfo.map;
event.preventDefault();
keyboard[code] = true;
});
canvas.addEventListener('keyup', (event) => {
let keyInfo = whitelistedKeys[event.code];
if (!keyInfo) return;
let code = event.code;
if (keyInfo.map) code = keyInfo.map;
event.preventDefault();
delete keyboard[code];
});
const C = 700; // scales the overall speed of the radius
const T = 500; // on which tick the radius change becomes linear
/*
f(x) = sqrt(C*x) when x < T
(C/(2*sqrt(CT)))(x-T) + sqrt(CT) when x >= T
radius(x) = f(x) + playerRadius;
*/
const F1 = (x) => Math.sqrt(C*x);
const F2C1 = C / (2 * Math.sqrt(C*T));
const F2C2 = Math.sqrt(C * T);
const F2 = (x) => (F2C1 * (x - T)) + F2C2;
const F = (x) => {
if (x < T) return F1(x);
return F2(x);
};
class Ripple {
constructor(id, currTick, x, y, bounces, color) {
this.id = id;
this.tick = currTick;
this.x = x;
this.y = y;
this.thickness = Math.pow(bounces+1, 1.25);
this.color = color;
this.winner = false;
this.maxRadius = hypotenuse(x, y);
this.maxRadius = Math.max(this.maxRadius, hypotenuse(width-x, y));
this.maxRadius = Math.max(this.maxRadius, hypotenuse(x, height-y));
this.maxRadius = Math.max(this.maxRadius, hypotenuse(width-x, height-y));
}
radius(currTick) {
const x = currTick - this.tick;
return F(x) + playerRadius;
}
draw(ctx, currTick) {
ctx.beginPath();
ctx.arc(this.x, this.y, this.radius(currTick), 0, Math.PI * 2, false);
ctx.closePath();
ctx.lineWidth = this.thickness;
ctx.strokeStyle = this.winner ? "#FF0000" : this.color;
ctx.stroke();
}
canGC(currTick) {
return this.radius(currTick) > this.maxRadius;
}
}
const playerRadius = 10;
const playerMoveAccel = 0.5;
const playerMoveDecel = 0.7;
const playerMaxMoveSpeed = 4;
const playerJumpSpeed = 0.08;
const playerMaxHeight = 1;
const playerGravity = 0.01;
class Player{
constructor(x, y, color) {
this.x = x;
this.y = y;
this.z = 0;
this.xVelocity = 0;
this.yVelocity = 0;
this.zVelocity = 0;
this.color = color;
this.falling = false;
this.lastJumpHeight = 0;
this.loser = false;
}
act() {
if (keyboard["ArrowUp"]) {
this.yVelocity = Math.max(-playerMaxMoveSpeed, this.yVelocity - playerMoveAccel);
} else if (keyboard["ArrowDown"]) {
this.yVelocity = Math.min(playerMaxMoveSpeed, this.yVelocity + playerMoveAccel);
} else if (this.yVelocity > 0) {
this.yVelocity = Math.max(0, this.yVelocity - playerMoveDecel);
} else if (this.yVelocity < 0) {
this.yVelocity = Math.min(0, this.yVelocity + playerMoveDecel);
}
this.y += this.yVelocity;
if (this.y < 0) this.y += height;
else if (this.y > height) this.y -= height;
if (keyboard["ArrowLeft"]) {
this.xVelocity = Math.max(-playerMaxMoveSpeed, this.xVelocity - playerMoveAccel);
} else if (keyboard["ArrowRight"]) {
this.xVelocity = Math.min(playerMaxMoveSpeed, this.xVelocity + playerMoveAccel);
} else if (this.xVelocity > 0) {
this.xVelocity = Math.max(0, this.xVelocity - playerMoveDecel);
} else if (this.xVelocity < 0) {
this.xVelocity = Math.min(0, this.xVelocity + playerMoveDecel);
}
this.x += this.xVelocity;
if (this.x < 0) this.x += width;
else if (this.x > width) this.x -= width;
let jumpHeld = keyboard["Space"];
if (jumpHeld && !this.falling && this.z < playerMaxHeight) {
this.lastJumpHeight = 0;
this.zVelocity = playerJumpSpeed;
} else {
this.zVelocity = Math.max(-playerJumpSpeed, this.zVelocity - playerGravity);
this.falling = this.z > 0;
}
let prevZ = this.z;
this.z = Math.max(0, this.z + this.zVelocity);
this.lastJumpHeight = Math.max(this.z, this.lastJumpHeight);
}
drawAt(ctx, atX, atY) {
const y = atY - (this.z * 40);
const radius = playerRadius * (this.z+1)
// draw main
ctx.beginPath();
ctx.arc(atX, y, radius, 0, Math.PI * 2, false);
ctx.closePath();
ctx.lineWidth = 0;
ctx.fillStyle = this.color;
ctx.fill();
if (this.loser) {
ctx.strokeStyle = '#FF0000';
ctx.lineWidth = 2;
ctx.stroke();
}
// draw shadow, if in the air
if (this.z > 0) {
let radius = Math.max(0, playerRadius * (1.2 - this.z));
ctx.beginPath();
ctx.arc(atX, atY, radius, 0, Math.PI * 2, false);
ctx.closePath();
ctx.lineWidth = 0;
ctx.fillStyle = this.color+"33";
ctx.fill();
}
}
draw(ctx) {
[-1, 0, 1].forEach((wScalar) => {
const w = width * wScalar;
[-1, 0, 1].forEach((hScalar) => {
const h = height * hScalar;
this.drawAt(ctx, this.x+w, this.y+h);
})
})
}
}
class Game {
constructor(canvas, scoreEl) {
this.currTick = 0;
this.player = new Player(width/2, height/2, palette[0]);
this.state = 'play';
this.score = 0;
this.scoreEl = scoreEl;
this.canvas = canvas;
this.ctx = canvas.getContext("2d");
this.ripples = [];
this.nextRippleID = 0;
}
shouldReset() {
return keyboard['KeyR'];
}
newRippleID() {
let id = this.nextRippleID;
this.nextRippleID++;
return id;
}
// newRipple initializes and stores a new ripple at the given coordinates, as
// well as all sub-ripples which make up the initial ripple's reflections.
newRipple(x, y, bounces, color) {
color = color ? color : palette[Math.floor(Math.random() * palette.length)];
let ripplePos = [];
let nextRipples = [];
let addRipple = (x, y) => {
for (let i in ripplePos) {
if (ripplePos[i][0] == x && ripplePos[i][1] == y) return;
}
let ripple = new Ripple(this.newRippleID(), this.currTick, x, y, bounces, color);
nextRipples.push(ripple);
ripplePos.push([x, y]);
this.ripples.push(ripple);
};
// add initial ripple, after this we deal with the sub-ripples.
addRipple(x, y);
while (bounces > 0) {
bounces--;
let prevRipples = nextRipples;
nextRipples = [];
for (let i in prevRipples) {
let prevX = prevRipples[i].x;
let prevY = prevRipples[i].y;
[-1, 0, 1].forEach((wScalar) => {
const w = this.canvas.width * wScalar;
[-1, 0, 1].forEach((hScalar) => {
const h = this.canvas.height * hScalar;
addRipple(prevX + w, prevY + h);
})
})
}
}
}
// playerRipplesState returns a mapping of rippleID -> boolean, where each
// boolean indicates the ripple's relation to the player at the moment. true
// indicates the player is outside the ripple, false indicates the player is
// within the ripple.
playerRipplesState() {
let state = {};
for (let i in this.ripples) {
let ripple = this.ripples[i];
let rippleRadius = ripple.radius(this.currTick);
let hs = Math.pow(ripple.x-this.player.x, 2) + Math.pow(ripple.y-this.player.y, 2);
state[ripple.id] = hs > Math.pow(rippleRadius + playerRadius, 2);
}
return state;
}
playerHasJumpedOverRipple(prev, curr) {
for (const rippleID in prev) {
if (!curr.hasOwnProperty(rippleID)) continue;
if (curr[rippleID] != prev[rippleID]) return true;
}
return false;
}
update() {
if (this.state != 'play') return;
let playerPrevZ = this.player.z;
this.player.act();
if (playerPrevZ == 0 && this.player.z > 0) {
// player has jumped
this.prevPlayerRipplesState = this.playerRipplesState();
} else if (playerPrevZ > 0 && this.player.z == 0) {
// player has landed, don't produce a ripple unless there are no
// existing ripples or the player jumped over an existing one.
if (
this.ripples.length == 0 ||
this.playerHasJumpedOverRipple(
this.prevPlayerRipplesState,
this.playerRipplesState()
)
) {
//let bounces = Math.floor((this.player.lastJumpHeight*1.8)+1);
const bounces = 1;
console.log("spawning ripple with bounces:", bounces);
this.newRipple(this.player.x, this.player.y, bounces);
this.score += bounces;
}
}
if (this.player.z == 0) {
for (let i in this.ripples) {
let ripple = this.ripples[i];
let rippleRadius = ripple.radius(this.currTick);
if (rippleRadius < playerRadius * 1.5) continue;
let hs = Math.pow(ripple.x-this.player.x, 2) + Math.pow(ripple.y-this.player.y, 2);
if (hs > Math.pow(rippleRadius + playerRadius, 2)) {
continue;
} else if (hs <= Math.pow(rippleRadius - playerRadius, 2)) {
continue;
} else {
console.log("game over", ripple);
ripple.winner = true;
this.player.loser = true;
this.state = 'gameOver';
// deliberately don't break here, in case multiple ripples hit
// the player on the same frame
}
}
}
this.ripples = this.ripples.filter(ripple => !ripple.canGC(this.currTick));
this.currTick++;
}
draw() {
this.ctx.clearRect(0, 0, this.canvas.width, this.canvas.height);
this.ripples.forEach(ripple => ripple.draw(this.ctx, this.currTick));
this.player.draw(this.ctx)
this.scoreEl.innerHTML = this.score;
}
}
const requestAnimationFrame =
window.requestAnimationFrame ||
window.mozRequestAnimationFrame ||
window.webkitRequestAnimationFrame ||
window.msRequestAnimationFrame;
let game = new Game(canvas, document.getElementById("score"));
function reset() {
game = new Game(canvas, document.getElementById("score"));
}
function nextFrame() {
if (game.shouldReset()) reset();
game.update()
game.draw()
requestAnimationFrame(nextFrame);
}
requestAnimationFrame(nextFrame);
canvas.focus();
</script>
## Changelog
The previous version was two easy to break, even with the requirement of jumping
over a ripple to generate a new one and increase your score. This led to the
following major changes:
* The game now incorporates asteroid/pacman mechanics. Rather than bouncing off
walls, the player and ripples will instead come out the opposite wall they
travel through.
* Jump height no longer affects score or the "strength" of a ripple.

View File

@ -1,62 +0,0 @@
---
title: >-
New Year's Resolution Vibe Check
description: >-
The not-quite-halfway progress report.
---
It's been over five months since I started my New Year's resolution, where I
committed to writing 52 blog posts by the end of the year. This week I'm on the
first vacation I've been able to take since the pandemic started, and, for lack
of anything else to really write about, am doing an almost-halfway checkup on
the whole process.
Almost immediately into the process I wished I'd set my sights a bit lower. One
post a week is a pretty intense pace, it turns out. If I were to reset the
parameters of the resolution I would probably halve the requirements, down to
26 posts in the year. One concern would be that I would be more likely to forget
to do the bi-weekly post, whereas with the current system it's coupled with my
normal work rhythm and so stays more top of mind. But I think I'd have a much
easier time (perhaps even twice as easy!), so it might balance out.
My thought in the beginning was that I could write on Friday afternoons or
Monday mornings as a bookend to working, but what's generally happened is that I
write on weekends. During the week the energy to write something up just isn't
there; writing posts is a kind of work all on its own, and I can only bring
myself to do so much work everyday.
Lately it's been particularly difficult to pump out the posts. Obviously a large
component of this is that I quickly picked all the low hanging fruit that were
on my mind when I started this resolution, but an unexpected culprit has also
appeared: seasons. When I started the resolution it was still winter, and during
the cold months it's a lot easier to stay inside and work on a computer. As the
weather warms it's been harder to find time though, in between working on the
garden and going out and doing things with friends.
Figuring out what to write about is becoming more of a challenge as well
(obviously, given the topic of this post). Ideally I'd like to post about things
I'm _doing_, rather than just talking about some topic, and for the most part
I've mostly kept to that. Constantly posting about ideas I have or opinions I
hold isn't really contributing any real work, unless the ideas or opinions are
really groundbreaking (they're not). If, on the other hand, I use the posts as a
kind of background motivation to get up and do something useful, so I can write
about what I did, then at least progress has been made on _something_.
The catch there is that I've now added an additional "thing" to do every week,
in addition to the weekly post, and, as previously covered, I just don't have
the time and energy for that. So some posts (ahem) are pretty much fluff, and I
barely have the energy for those! Yet another reason to wish I'd committed to 26
in the year, I suppose.
It hasn't been all added stress and strife though. Doing the posts _has_ caused
me to work on side projects more, and even better quite a few people I know have
given me really good feedback on what I've been doing, and some have even
started getting involved. So, in the sense of being a way to inform others about
the things I'm working on, the posts are a great success! And I've definitely
been more consistent about working on side projects this year.
I'll wrap this up and continue with my vacation. Summary: blog is more extra
work than expected, it's maybe worth it, but it would be more worth it if I
halved my pace. I'm not _going_ to halve my pace, because that's not how
resolutions work. The end.

View File

@ -1,213 +0,0 @@
---
title: >-
Visualization 4
description: >-
Birth, death, and colors.
series: viz
tags: tech art
---
<canvas id="canvas" style="padding-bottom: 2rem;" width="100%" height="100%"></canvas>
This visualization is a conglomeration of ideas from all the previous ones. On
each tick up to 20 new pixels are generated. The color of each new pixel is
based on the average color of its neighbors, plus some random drift.
Each pixel dies after a certain number of ticks, `N`. A pixel's life can be
extended by up to `8N` ticks, one for each neighbor it has which is still alive.
This mechanism accounts for the strange behavior which is seen when the
visualization first loads, but also allows for more coherent clusters of pixels
to hold together as time goes on.
The asteroid rule is also in effect in this visualization, so the top row and
bottom row pixels are neighbors of each other, and similarly for the rightmost
and leftmost column pixels.
<script type="text/javascript">
function randn(n) {
return Math.floor(Math.random() * n);
}
const canvas = document.getElementById("canvas");
const parentWidth = canvas.parentElement.offsetWidth;
const rectSize = Math.floor(parentWidth /100 /2) *2; // must be even number
console.log("rectSize", rectSize);
canvas.width = parentWidth - rectSize - (parentWidth % rectSize);
canvas.height = canvas.width * 0.75;
canvas.height -= canvas.height % rectSize;
const ctx = canvas.getContext("2d");
const w = (canvas.width / rectSize) - 1;
const h = (canvas.height / rectSize) - 1;
class Elements {
constructor() {
this.els = {};
this.diff = {};
}
_normCoord(coord) {
if (typeof coord !== 'string') coord = JSON.stringify(coord);
return coord;
}
get(coord) {
return this.els[this._normCoord(coord)];
}
getAll() {
return Object.values(this.els);
}
set(coord, el) {
this.diff[this._normCoord(coord)] = {action: "set", coord: coord, ...el};
}
unset(coord) {
this.diff[this._normCoord(coord)] = {action: "unset"};
}
drawDiff(ctx) {
for (const coordStr in this.diff) {
const el = this.diff[coordStr];
const coord = JSON.parse(coordStr);
if (el.action == "set") {
ctx.fillStyle = `hsl(${el.h}, ${el.s}, ${el.l})`;
} else {
ctx.fillStyle = `#FFF`;
}
ctx.fillRect(coord[0]*rectSize, coord[1]*rectSize, rectSize, rectSize);
}
}
applyDiff() {
for (const coordStr in this.diff) {
const el = this.diff[coordStr];
delete this.diff[coordStr];
if (el.action == "set") {
delete el.action;
this.els[coordStr] = el;
} else {
delete this.els[coordStr];
}
}
}
}
const neighbors = [
[-1, -1], [0, -1], [1, -1],
[-1, 0], /* [0, 0], */ [1, 0],
[-1, 1], [0, 1], [1, 1],
];
function neighborsOf(coord) {
return neighbors.map((n) => {
let nX = coord[0]+n[0];
let nY = coord[1]+n[1];
nX = (nX + w) % w;
nY = (nY + h) % h;
return [nX, nY];
});
}
function randEmptyNeighboringCoord(els, coord) {
const neighbors = neighborsOf(coord).sort(() => Math.random() - 0.5);
for (const nCoord of neighbors) {
if (!els.get(nCoord)) return nCoord;
}
return null;
}
function neighboringElsOf(els, coord) {
const neighboringEls = [];
for (const nCoord of neighborsOf(coord)) {
const el = els.get(nCoord);
if (el) neighboringEls.push(el);
}
return neighboringEls;
}
const drift = 30;
function newEl(nEls) {
// for each h (which can be considered as degrees around a circle) break the h
// down into x and y vectors, and add those up separately. Then find the angle
// between those two resulting vectors, and that's the "average" h value.
let x = 0;
let y = 0;
nEls.forEach((el) => {
const hRad = el.h * Math.PI / 180;
x += Math.cos(hRad);
y += Math.sin(hRad);
});
let h = Math.atan2(y, x);
h = h / Math.PI * 180;
// apply some random drift, normalize
h += (Math.random() * drift * 2) - drift;
h = (h + 360) % 360;
return {
h: h,
s: "100%",
l: "50%",
};
}
const requestAnimationFrame =
window.requestAnimationFrame ||
window.mozRequestAnimationFrame ||
window.webkitRequestAnimationFrame ||
window.msRequestAnimationFrame;
const els = new Elements();
const maxNewElsPerTick = 20;
const deathThresh = 20;
let tick = 0;
function doTick() {
tick++;
const allEls = els.getAll().sort(() => Math.random() - 0.5);
if (allEls.length == 0) {
els.set([w/2, h/2], {
h: randn(360),
s: "100%",
l: "50%",
});
}
let newEls = 0;
for (const el of allEls) {
const nCoord = randEmptyNeighboringCoord(els, el.coord);
if (!nCoord) continue; // el has no empty neighboring spots
const nEl = newEl(neighboringElsOf(els, nCoord))
nEl.tick = tick;
els.set(nCoord, nEl);
newEls++;
if (newEls >= maxNewElsPerTick) break;
}
for (const el of allEls) {
const nEls = neighboringElsOf(els, el.coord);
if (tick - el.tick - (nEls.length * deathThresh) >= deathThresh) els.unset(el.coord);
}
els.drawDiff(ctx);
els.applyDiff();
requestAnimationFrame(doTick);
}
requestAnimationFrame(doTick);
</script>

View File

@ -1,306 +0,0 @@
---
title: >-
Visualization 5
description: >-
Seeing double.
series: viz
tags: tech art
---
<script type="text/javascript">
function randn(n) {
return Math.floor(Math.random() * n);
}
const w = 100;
const h = 50;
const maxNewElsPerTick = 10;
const deathThresh = 10;
class Canvas {
constructor(canvasDOM) {
this.dom = canvasDOM;
this.ctx = canvasDOM.getContext("2d");
// expand canvas element's width to match parent.
this.dom.width = this.dom.parentElement.offsetWidth;
// rectSize must be an even number or the pixels don't display nicely.
this.rectSize = Math.floor(this.dom.width / w /2) * 2;
this.dom.width = w * this.rectSize;
this.dom.height = h * this.rectSize;
}
rectSize() {
return Math.floor(this.dom.width / w);
}
}
class Layer {
constructor(newEl) {
this.els = {};
this.diff = {};
this.newEl = newEl;
}
_normCoord(coord) {
if (typeof coord !== 'string') coord = JSON.stringify(coord);
return coord;
}
get(coord) {
return this.els[this._normCoord(coord)];
}
getAll() {
return Object.values(this.els);
}
set(coord, el) {
this.diff[this._normCoord(coord)] = {action: "set", coord: coord, ...el};
}
unset(coord) {
this.diff[this._normCoord(coord)] = {action: "unset"};
}
applyDiff() {
for (const coordStr in this.diff) {
const el = this.diff[coordStr];
delete this.diff[coordStr];
if (el.action == "set") {
delete el.action;
this.els[coordStr] = el;
} else {
delete this.els[coordStr];
}
}
}
update(state) {
// Apply diff from previous update first. The diff can't be applied last
// because it needs to be present during the draw phase.
this.applyDiff();
const allEls = this.getAll().sort(() => Math.random() - 0.5);
if (allEls.length == 0) {
this.set([w/2, h/2], this.newEl([]));
}
let newEls = 0;
for (const el of allEls) {
const nCoord = randEmptyNeighboringCoord(this, el.coord);
if (!nCoord) continue; // el has no empty neighboring spots
const nEl = this.newEl(neighboringElsOf(this, nCoord))
nEl.tick = state.tick;
this.set(nCoord, nEl);
newEls++;
if (newEls >= maxNewElsPerTick) break;
}
for (const el of allEls) {
const nEls = neighboringElsOf(this, el.coord);
if (state.tick - el.tick - (nEls.length * deathThresh) >= deathThresh) this.unset(el.coord);
}
}
draw(canvas) {
for (const coordStr in this.diff) {
const el = this.diff[coordStr];
const coord = JSON.parse(coordStr);
if (el.action == "set") {
canvas.ctx.fillStyle = `hsl(${el.h}, ${el.s}, ${el.l})`;
canvas.ctx.fillRect(
coord[0]*canvas.rectSize, coord[1]*canvas.rectSize,
canvas.rectSize, canvas.rectSize,
);
} else {
canvas.ctx.clearRect(
coord[0]*canvas.rectSize, coord[1]*canvas.rectSize,
canvas.rectSize, canvas.rectSize,
);
}
}
}
}
const neighbors = [
[-1, -1], [0, -1], [1, -1],
[-1, 0], /* [0, 0], */ [1, 0],
[-1, 1], [0, 1], [1, 1],
];
function neighborsOf(coord) {
return neighbors.map((n) => {
let nX = coord[0]+n[0];
let nY = coord[1]+n[1];
nX = (nX + w) % w;
nY = (nY + h) % h;
return [nX, nY];
});
}
function randEmptyNeighboringCoord(layer, coord) {
const neighbors = neighborsOf(coord).sort(() => Math.random() - 0.5);
for (const nCoord of neighbors) {
if (!layer.get(nCoord)) return nCoord;
}
return null;
}
function neighboringElsOf(layer, coord) {
const neighboringEls = [];
for (const nCoord of neighborsOf(coord)) {
const el = layer.get(nCoord);
if (el) neighboringEls.push(el);
}
return neighboringEls;
}
const drift = 30;
function mkNewEl(l) {
return (nEls) => {
const s = "100%";
if (nEls.length == 0) {
return {
h: randn(360),
s: s,
l: l,
};
}
// for each h (which can be considered as degrees around a circle) break the h
// down into x and y vectors, and add those up separately. Then find the angle
// between those two resulting vectors, and that's the "average" h value.
let x = 0;
let y = 0;
nEls.forEach((el) => {
const hRad = el.h * Math.PI / 180;
x += Math.cos(hRad);
y += Math.sin(hRad);
});
let h = Math.atan2(y, x);
h = h / Math.PI * 180;
// apply some random drift, normalize
h += (Math.random() * drift * 2) - drift;
h = (h + 360) % 360;
return {
h: h,
s: s,
l: l,
};
}
}
class Universe {
constructor(canvasesByClass, layersByClass) {
this.canvasesByClass = canvasesByClass;
this.state = {
tick: 0,
layers: layersByClass,
};
}
update() {
this.state.tick++;
Object.values(this.state.layers).forEach((layer) => layer.update(this.state));
}
draw() {
for (const layerName in this.state.layers) {
if (!this.canvasesByClass[layerName]) return;
this.canvasesByClass[layerName].forEach((canvas) => {
this.state.layers[layerName].draw(canvas);
});
}
}
}
</script>
<style>
.canvasContainer {
display: grid;
margin-bottom: 2rem;
text-align: center;
}
canvas {
border: 1px dashed #AAA;
width: 100%;
grid-area: 1/1/2/2;
}
</style>
<div class="canvasContainer">
<canvas class="layer1"></canvas>
<canvas class="layer2"></canvas>
</div>
<div class="row">
<div class="columns six">
<div class="canvasContainer"><canvas class="layer1"></canvas></div>
</div>
<div class="columns six">
<div class="canvasContainer"><canvas class="layer2"></canvas></div>
</div>
</div>
This visualization combines two distinct layers, each of them borrowing their
behavior from [Visualization 4][viz4]. Neither layer has any effect on the
other, one is merely super-imposed on top of the other in the top canvas. You
can see each layer individually in the two lower canvases.
Despite their not affecting each other, the code is set up so that each layer
_could_ be affected by the other. This will likely be explored more in a future
post.
[viz4]: {% post_url 2021-05-26-viz-4 %}
<script>
const canvasesByClass = {};
[...document.getElementsByTagName("canvas")].forEach((canvasDOM) => {
const canvas = new Canvas(canvasDOM);
canvasDOM.classList.forEach((name) => {
if (!canvasesByClass[name]) canvasesByClass[name] = [];
canvasesByClass[name].push(canvas);
})
});
const universe = new Universe(canvasesByClass, {
"layer1": new Layer(mkNewEl("90%")),
"layer2": new Layer(mkNewEl("50%")),
});
const requestAnimationFrame =
window.requestAnimationFrame ||
window.mozRequestAnimationFrame ||
window.webkitRequestAnimationFrame ||
window.msRequestAnimationFrame;
function doTick() {
universe.update();
universe.draw();
requestAnimationFrame(doTick);
}
doTick();
</script>

View File

@ -1,271 +0,0 @@
---
title: >-
Adventures In DeFi
description: >-
There and Back Again, a Yield Farmer's Tale.
---
It's difficult to be remotely interested in crypto and avoid the world of
decentralized finance (DeFi). Somewhere between the explosion of new projects,
implausible APY percents, complex tokens schemes, new phrases like "yield
farming" and "impermanent loss", rug pulls, hacks, and astronomical ethereum
fees, you simply _must_ have heard of it, even in passing.
In late November of 2020 I decided to jump in and see what would happen. I read
everything I could find, got as educated as I could, did some (but probably not
enough) math, and got to work. Almost immediately afterwards a giant bull
market hit, fees on ethereum shot up to the moon, and my little yield farming
DeFi ship was effectively out to sea.
For the past 200 days I haven't been able to tweak or withdraw any of the DeFi
positions I made, for fear of incurring so many ethereum fees that any gains I
made would be essentially wiped out. But the bull market is finally at a rest,
fees are down, and I'm interested in what the results of my involuntary
long-term experiment were. Before getting to the results though, let's start at
the beginning. I'm going to walk you through all the steps I took, as well as my
decision making process (as flawed as it surely was) and risk assessments.
## Step 1: The Base Positions
My first step was to set aside some ETH and BTC for this experiment. I was (and
remain) confident that these assets would acrue in value, and so wanted to hold
onto them for a long period of time. But while holding onto those assets, why
not make a little interest on them by putting them to use? That's where DeFi
comes in.
I started with 2.04 ETH and 0.04 BTC. The ETH existed as normal ETH on the
ethereum blockchain, while the 0.04 BTC I had to first convert to [renBTC][ren].
renBTC is an ethereum token whose value is pinned to the value of BTC. This is
accomplished via a decentralized locking mechanism, wherein real BTC is
transferred to a decentralized network of ren nodes, and they lock it such that
no individual node has access to the wallet holding the BTC. At the same time
that the BTC is locked, the nodes print and transfer a corresponding amount of
renBTC to a wallet specified in the BTC transaction. It's a very interesting
project, though the exact locking mechanism used was closed-source at the time I
used it, which concerned me somewhat.
[ren]: https://renproject.io/
### Step 1.5: Collateralization
In Step 2 I deposit my assets into liquidity pools. For my renBTC this was no
problem, but for my ETH it wasn't so simple. I'll explain what a liquidity pool
is in the next section, but for now all that needs to be known is that there are
no worthwhile liquidity pools between ETH and anything ostensibly pinned to ETH
(e.g. WETH). So I needed to first convert my ETH into an asset for which there
are worthwhile liquidity pools, while also not losing my ETH position.
Enter [MakerDAO][makerdao]. MakerDAO runs a decentralized collateralization app,
wheren a user deposits assets into a contract and is granted an amount of DAI
tokens relative to the value of the deposited assets. The value of DAI tokens
are carefully managed via the variable fee structure of the MakerDAO app, such
that 1 DAI is, generally, equal to 1 USD. If the value of the collateralized
assets drops below a certain threshold the position is liquidated, meaning the
user keeps the DAI and MakerDAO keeps the assets. It's not dissimilar to taking
a loan out, using one's house as collateral, except that the collateral is ETH
and not a house.
MakerDAO allows you to choose, within some bounds, how much DAI you withdraw on
your deposited collateral. The more DAI you withdraw, the higher your
liquidation threshold, and if your assets fall in value and hit that threshold
you lose them, so a higher threshold entails more risk. In this way the user has
some say over how risky of a position they want to take out.
In my case I took out a total of 500 DAI on my 2.04 ETH. Even at the time this
was somewhat conservative, but now that the price of ETH has 5x'd it's almost
comical. In any case, I now had 500 DAI to work with, and could move on to the
next step.
[makerdao]: https://makerdao.com/
## Step 2: Liquidity Pools
My assets were ready to get put to work, and the work they got put to was in
liquidity pools (LPs). The function of an LP is to facilitate the exchange of
one asset for another between users. They play the same role as a centralized
exchange like Kraken or Binance, but are able to operate on decentralized chains
by using a different exchange mechanism.
I won't go into the details of how LPs work here, as it's not super pertinent.
There's great explainers, like [this one][lp], that are easy to find. Suffice it
to say that each LP operates on a set of assets that it allows users to convert
between, and LP providers can deposit one or more of those assets into the pool
in order to earn fees on each conversion.
When you deposit an asset into an LP you receive back a corresponding amount of
tokens representing your position in that LP. Each LP has its own token, and
each token represents a share of of the pool that the provider owns. The value
of each token goes up over time as fees are collected, and so acts as the
mechanism by which the provider ultimately collects their yield.
In addition to the yield one gets from users making conversions via the LP, LP
providers are often also further incentivized by being granted governance tokens
in the LPs they provide for, which they can then turn around and sell directly
or hold onto as an investment. These are usually granted via a staking
mechanism, where the LP provider stakes (or "locks") their LP tokens into the
platform, and is able to withdraw the incentive token based on how long and how
much they've staked.
Some LP projects, such as [Sushi][sushi], have gone further and completely
gamified the whole experience, and are the cause of the multi thousand percent
APYs that DeFi has become somewhat famous for. These projects are flashy, but I
couldn't find myself placing any trust in them.
There is a risk in being an LP provider, and it's called ["impermanent
loss"][il]. This is another area where it's not worth going into super detail,
so I'll just say that impermanent loss occurs when the relative value of the
assets in the pool diverges significantly. For example, if you are a provider in
a BTC/USDC pool, and the value of BTC relative to USD either tanks or
skyrockets, you will have ended up losing money.
I wanted to avoid impermanent loss, and so focused on pools where the assets
have little chance of diverging. These would be pools where the assets are
ostensibly pinned in value, for example a pool between DAI and USDC, or between
renBTC and WBTC. These are called stable pools. By choosing such pools my only
risk was in one of the pooled assets suddenly losing all of its value due to a
flaw in its mechanism, for example if MakerDAO's smart contract were to be
hacked. Unfortunately, stable pools don't have as great yields as their volatile
counterparts, but given that this was all gravy on top of the appreciation of
the underlying ETH and BTC I didn't mind this as much.
I chose the [Curve][curve] project as my LP project of choice. Curve focuses
mainly on stable pools, and provides decent yield percents in that area while
also being a relatively trusted and actively developed project.
I made the following deposits into Curve:
* 200 DAI into the [Y Pool][ypool], receiving back 188 LP tokens.
* 300 DAI into the [USDN Pool][usdnpool], receiving back 299 LP tokens.
* 0.04 renBTC into the [tBTC Pool][tbtcpool], receiving back 0.039 LP tokens.
[lp]: https://finematics.com/liquidity-pools-explained/
[il]: https://finematics.com/impermanent-loss-explained/
[sushi]: https://www.sushi.com/
[curve]: https://curve.fi
[ypool]: https://curve.fi/iearn
[usdnpool]: https://curve.fi/usdn
[tbtcpool]: https://curve.fi/tbtc
## Step 3: Yield Farming
At this point I could have taken the next step of staking my LP tokens into the
Curve platform, and periodically going in and reaping the incentive tokens that
doing so would earn me. I could then sell these tokens and re-invest the profits
back into the LP, and then stake the resulting LP tokens back into Curve,
resulting in a higher yield the next time I reap the incentives, ad neaseaum
forever.
This is a fine strategy, but it has two major drawbacks:
* I don't have the time, nor the patience, to implement it.
* ETH transaction fees would make it completely impractical.
Luckily, yield farming platforms exist. Rather than staking your LP tokens
yourself, you instead deposit them into a yield farming platform. The platform
aggregates everyone's LP tokens, stakes them, and automatically collects and
re-invests incentives in large batches. By using a yield farming platform,
small, humble yield farmers like myself can pool our resources together to take
advantage of scale we wouldn't normally have.
Of course, yield farming adds yet another gamification layer to the whole
system, and complicates everything. You'll see what I mean in a moment.
The yield farming platform I chose was [Harvest][harvest]. Overall
Harvest had the best advertised APYs (though those can obviously change on a
dime), a large number of farmed pools that gets updated regularly, as well as a
simple interface that I could sort of understand. The project is a _bit_ of a
mess, and there's probably better options now, but it was what I had at the
time.
For each of the 3 kinds of LP tokens I had collected in Step 2 I deposited them
into the corresponding farming pool on Harvest. As with the LPs, for each
farming pool you deposit into you receive back a corresponding amount of farming
pool tokens which you can then stake back into Harvest. Based on how much you
stake into Harvest you can collect a certain amount of FARM tokens periodically,
which you can then sell, yada yada yada. It's farming all the way down. I didn't
bother much with this.
[harvest]: https://harvest.finance
## Step 4: Wait
At this point the market picked up, ethereum transactions shot up from 20 to 200
gwei, and I was no longer able to play with my DeFi money without incurring huge
losses. So I mostly forgot about it, and only now am coming back to it to see
the damage.
## Step 5: Reap What I've Sown
It's 200 days later, fees are down again, and enough time has passed that I
could plausibly evaluate my strategy, I've gone through the trouble of undoing
all my positions in order to arrive back at my base assets, ETC and BTC. While
it's tempting to just keep the DeFi ship floating on, I think I need to redo it
in a way that I won't be paralyzed during the next market turn, and I'd like to
evaluate other chains besides ethereum.
First, I've unrolled my Harvest positions, collecting the original LP tokens
back plus whatever yield the farming was able to generate. The results of that
step are:
* 194 Y Pool tokens (originally 188).
* 336 USDN Pool tokens (originally 299).
* 0.0405 tBTC Pool tokens (originally 0.039).
Second, I've burned those LP tokens to collect back the original assets from the
LPs, resulting in:
* 215.83 DAI from the Y Pool (originally 200).
* 346.45 DAI from the USDN Pool (originally 300).
* 0.0405 renBTC from the tBTC Pool (originally 0.04).
For a total DAI of 562.28.
Finally, I've re-deposited the DAI back into MakerDAO to reclaim my original
ETH. I had originally withdrawn 500 DAI, but due to interest I now owed 511
DAI. So after reclaiming my full 2.04 ETH I have ~51 DAI leftover.
## Insane Profits
Calculating actual APY for the BTC investment is straightforward: it came out to
about 4.20% APY. Not too bad, considering the position is fairly immune to price
movements.
Calculating for ETH is a bit trickier, since in the end I ended up with the same
ETH as I started with (2.04) plus 51 DAI. If I were to purchase ETH with that
DAI now, it would get me ~0.02 further ETH. Not a whole heck of a lot. And that
doesn't even account for ethereum fees! I made 22 ethereum transactions
throughout this whole process, resulting in ~0.098 ETH spent on transaction
fees.
So in the end, I lost 0.078 ETH, but gained 0.0005 BTC. If I were to
convert the BTC gain to ETH now it would give me a net total profit of:
**-0.071 ETH**
A net loss, how fun!
## Conclusions
There were a lot of takeaways from this experiment:
* ETH fees will get ya, even in the good times. I would need to be working with
at least an order of magnitude higher base position in order for this to work
out in my favor.
* I should have put all my DAI in the Curve USDN pool, and not bothered with the
Y pool. It had almost double the percent return in the end.
* Borrowing DAI on my ETH was fun, but it really cuts down on how much of my ETH
value I'm able to take advantage of. My BTC was able to be fully invested,
whereas at most half of my ETH value was.
* If I have a large USD position I want to sit on, the USDN pool on its own is
not the worst place to park it. The APY on it was about 30%!
I _will_ be trying this again, albeit with a bigger budget and more knowledge. I
want to check out other chains besides ethereum, so as to avoid the fees, as
well as other yield mechanisms besides LPs, and other yield farming platforms
besides Harvest.
Until then!

View File

@ -1,402 +0,0 @@
---
title: >-
Visualization 6
description: >-
Eat your heart out, Conway!
series: viz
tags: tech art
---
<script type="text/javascript">
function randn(n) {
return Math.floor(Math.random() * n);
}
const w = 100;
const h = 50;
class Canvas {
constructor(canvasDOM) {
this.dom = canvasDOM;
this.ctx = canvasDOM.getContext("2d");
// expand canvas element's width to match parent.
this.dom.width = this.dom.parentElement.offsetWidth;
// rectSize must be an even number or the pixels don't display nicely.
this.rectSize = Math.floor(this.dom.width / w /2) * 2;
this.dom.width = w * this.rectSize;
this.dom.height = h * this.rectSize;
}
rectSize() {
return Math.floor(this.dom.width / w);
}
}
class Layer {
constructor(className, newEl, {
maxNewElsPerTick = 10,
ageOfDeath = 60,
neighborBonusScalar = 1,
layerBonusScalar = 1,
chaos = 0,
} = {}) {
this.className = className;
this.els = {};
this.diff = {};
this.newEl = newEl;
this.maxNewElsPerTick = maxNewElsPerTick;
this.ageOfDeath = ageOfDeath;
this.neighborBonusScalar = neighborBonusScalar;
this.layerBonusScalar = layerBonusScalar;
this.chaos = chaos;
}
_normCoord(coord) {
if (typeof coord !== 'string') coord = JSON.stringify(coord);
return coord;
}
get(coord) {
return this.els[this._normCoord(coord)];
}
getAll() {
return Object.values(this.els);
}
set(coord, el) {
this.diff[this._normCoord(coord)] = {action: "set", coord: coord, ...el};
}
unset(coord) {
this.diff[this._normCoord(coord)] = {action: "unset"};
}
applyDiff() {
for (const coordStr in this.diff) {
const el = this.diff[coordStr];
delete this.diff[coordStr];
if (el.action == "set") {
delete el.action;
this.els[coordStr] = el;
} else {
delete this.els[coordStr];
}
}
}
update(state, prevLayer) {
// Apply diff from previous update first. The diff can't be applied last
// because it needs to be present during the draw phase.
this.applyDiff();
const allEls = this.getAll().sort(() => Math.random() - 0.5);
if (allEls.length == 0) {
const nEl = this.newEl([])
nEl.tick = state.tick;
this.set([w/2, h/2], nEl);
return;
}
let newEls = 0;
for (const el of allEls) {
const nCoord = randEmptyNeighboringCoord(this, el.coord);
if (!nCoord) continue; // el has no empty neighboring spots
const nEl = this.newEl(neighboringElsOf(this, nCoord))
nEl.tick = state.tick;
this.set(nCoord, nEl);
newEls++;
if (newEls >= this.maxNewElsPerTick) break;
}
for (const el of allEls) {
const age = state.tick - el.tick;
const neighborBonus = neighboringElsOf(this, el.coord).length * this.neighborBonusScalar;
const layerBonus = prevLayer
? neighboringElsOf(prevLayer, el.coord, true).length * this.layerBonusScalar
: 0;
const chaos = (this.chaos > 0) ? randn(this.chaos) : 0;
if (age - neighborBonus - layerBonus + chaos >= this.ageOfDeath) {
this.unset(el.coord);
}
}
}
draw(canvas) {
for (const coordStr in this.diff) {
const el = this.diff[coordStr];
const coord = JSON.parse(coordStr);
if (el.action == "set") {
canvas.ctx.fillStyle = `hsl(${el.h}, ${el.s}, ${el.l})`;
canvas.ctx.fillRect(
coord[0]*canvas.rectSize, coord[1]*canvas.rectSize,
canvas.rectSize, canvas.rectSize,
);
} else {
canvas.ctx.clearRect(
coord[0]*canvas.rectSize, coord[1]*canvas.rectSize,
canvas.rectSize, canvas.rectSize,
);
}
}
}
}
const neighbors = [
[-1, -1], [0, -1], [1, -1],
[-1, 0], /* [0, 0], */ [1, 0],
[-1, 1], [0, 1], [1, 1],
];
function neighborsOf(coord) {
return neighbors.map((n) => {
let nX = coord[0]+n[0];
let nY = coord[1]+n[1];
nX = (nX + w) % w;
nY = (nY + h) % h;
return [nX, nY];
});
}
function randEmptyNeighboringCoord(layer, coord) {
const neighbors = neighborsOf(coord).sort(() => Math.random() - 0.5);
for (const nCoord of neighbors) {
if (!layer.get(nCoord)) return nCoord;
}
return null;
}
function neighboringElsOf(layer, coord, includeCoord = false) {
const neighboringEls = [];
const neighboringCoords = neighborsOf(coord);
if (includeCoord) neighboringCoords.push(coord);
for (const nCoord of neighboringCoords) {
const el = layer.get(nCoord);
if (el) neighboringEls.push(el);
}
return neighboringEls;
}
const drift = 30;
function mkNewEl(l) {
return (nEls) => {
const s = "100%";
if (nEls.length == 0) {
return {
h: randn(360),
s: s,
l: l,
};
}
// for each h (which can be considered as degrees around a circle) break the h
// down into x and y vectors, and add those up separately. Then find the angle
// between those two resulting vectors, and that's the "average" h value.
let x = 0;
let y = 0;
nEls.forEach((el) => {
const hRad = el.h * Math.PI / 180;
x += Math.cos(hRad);
y += Math.sin(hRad);
});
let h = Math.atan2(y, x);
h = h / Math.PI * 180;
// apply some random drift, normalize
h += (Math.random() * drift * 2) - drift;
h = (h + 360) % 360;
return {
h: h,
s: s,
l: l,
};
}
}
class Universe {
constructor(canvasesByClass, layers) {
this.canvasesByClass = canvasesByClass;
this.state = {
tick: 0,
layers: layers,
};
}
update() {
this.state.tick++;
let prevLayer;
this.state.layers.forEach((layer) => {
layer.update(this.state, prevLayer);
prevLayer = layer;
});
}
draw() {
this.state.layers.forEach((layer) => {
if (!this.canvasesByClass[layer.className]) return;
this.canvasesByClass[layer.className].forEach((canvas) => {
layer.draw(canvas);
});
});
}
}
</script>
<style>
.canvasContainer {
display: grid;
margin-bottom: 2rem;
text-align: center;
}
canvas {
border: 1px dashed #AAA;
width: 100%;
grid-area: 1/1/2/2;
}
</style>
<div class="canvasContainer">
<canvas class="layer1"></canvas>
<canvas class="layer2"></canvas>
</div>
<div class="row">
<div class="columns six">
<h3>Bottom Layer</h3>
<div class="canvasContainer"><canvas class="layer1"></canvas></div>
<div class="layer1 layerParams">
<label>Max New Elements Per Tick</label><input type="text" param="maxNewElsPerTick" />
<label>Age of Death</label><input type="text" param="ageOfDeath" />
<label>Neighbor Bonus Scalar</label><input type="text" param="neighborBonusScalar" />
</div>
</div>
<div class="columns six">
<h3>Top Layer</h3>
<div class="canvasContainer"><canvas class="layer2"></canvas></div>
<div class="layer2 layerParams">
<label>Max New Elements Per Tick</label><input type="text" param="maxNewElsPerTick" />
<label>Age of Death</label><input type="text" param="ageOfDeath" />
<label>Neighbor Bonus Scalar</label><input type="text" param="neighborBonusScalar" />
<label>Layer Bonus Scalar</label><input type="text" param="layerBonusScalar" />
</div>
</div>
</div>
This visualization is essentially the same as the previous, except that each
layer now operates with different parameters than the other, allowing each to
exhibit different behavior.
Additionally, the top layer has been made to be responsive to the bottom, via a
new mechanism where the age of an element on the top layer can be extended based
on the number of bottom layer elements it neighbors.
Finally, the UI now exposes the actual parameters which are used to tweak the
behavior of each layer. Modifying any parameter will change the behavior of the
associated layer in real-time. The default parameters have been chosen such that
the top layer is now rather dependent on the bottom for sustenance, although it
can venture away to some extent. However, by playing the parameters yourself you
can find other behaviors and interesting cause-and-effects that aren't
immediately obvious. Try it!
An explanation of the parameters is as follows:
On each tick, up to `maxNewElements` are created in each layer, where each new
element neighbors an existing one.
Additionally, on each tick, _all_ elements in a layer are iterated through. Each
one's age is determined as follows:
```
age = (currentTick - birthTick)
age -= (numNeighbors * neighborBonusScalar)
age -= (numBottomLayerNeighbors * layerBonusScalar) // only for top layer
```
If an element's age is greater than or equal to the `ageOfDeath` for that layer,
then the element is removed.
<script>
const canvasesByClass = {};
[...document.getElementsByTagName("canvas")].forEach((canvasDOM) => {
const canvas = new Canvas(canvasDOM);
canvasDOM.classList.forEach((name) => {
if (!canvasesByClass[name]) canvasesByClass[name] = [];
canvasesByClass[name].push(canvas);
})
});
const layers = [
new Layer("layer1", mkNewEl("90%"), {
maxNewElsPerTick: 2,
ageOfDeath: 30,
neighborBonusScalar: 50,
}),
new Layer("layer2", mkNewEl("50%", ), {
maxNewElsPerTick: 10,
ageOfDeath: 1,
neighborBonusScalar: 15,
layerBonusScalar: 5,
}),
];
for (const layer of layers) {
document.querySelectorAll(`.${layer.className}.layerParams > input`).forEach((input) => {
const param = input.getAttribute("param");
// pre-fill input values
input.value = layer[param];
input.onchange = () => {
console.log(`setting ${layer.className}.${param} to ${input.value}`);
layer[param] = input.value;
};
});
}
const universe = new Universe(canvasesByClass, layers);
const requestAnimationFrame =
window.requestAnimationFrame ||
window.mozRequestAnimationFrame ||
window.webkitRequestAnimationFrame ||
window.msRequestAnimationFrame;
function doTick() {
universe.update();
universe.draw();
requestAnimationFrame(doTick);
}
doTick();
</script>

View File

@ -1,277 +0,0 @@
---
title: >-
Self-Hosted Email With maddy: A Naive First Attempt
description: >-
How hard could it be?
tags: tech
series: selfhost
---
For a _long_ time now I've wanted to get off gmail and host my own email
domains. I've looked into it a few times, but have been discouraged on multiple
fronts:
* Understanding the protocols underlying email isn't straightforward; it's an
old system, there's a lot of cruft, lots of auxiliary protocols that are now
essentially required, and a lot of different services required to tape it all
together.
* The services which are required are themselves old, and use operational
patterns that maybe used to make sense but are now pretty freaking cumbersome.
For example, postfix requires something like 3 different system accounts.
* Deviating from the non-standard route and using something like
[Mail-in-a-box][miab] involves running docker, which I'm trying to avoid.
So up till now I had let the idea sit, waiting for something better to come
along.
[maddy][maddy] is, I think, something better. According to the homepage
"\[maddy\] replaces Postfix, Dovecot, OpenDKIM, OpenSPF, OpenDMARC and more with
one daemon with uniform configuration and minimal maintenance cost." Sounds
perfect! The homepage is clean and to the point, it's written in go, and the
docs appear to be reasonably well written. And, to top it all off, it's already
been added to [nixpkgs][nixpkgs]!
So in this post (and subsequent posts) I'll be documenting my journey into
getting a maddy server running to see how well it works out.
## Just Do It
I'm almost 100% sure this won't work, but to start with I'm going to simply get
maddy up and running on my home media server as per the tutorial on its site,
and go from there.
First there's some global system configuration I need to perform. Ideally maddy
could be completely packaged up and not pollute the rest of the system at all,
and if I was using NixOS I think that would be possible, but as it is I need to
create a user for maddy and ensure it's able to read the TLS certificates that I
manage via [LetsEncrypt][le].
```bash
sudo useradd -mrU -s /sbin/nologin -d /var/lib/maddy -c "maddy mail server" maddy
sudo setfacl -R -m u:maddy:rX /etc/letsencrypt/{live,archive}
```
The next step is to set up the nix build of the systemd service file. This is a
strategy I've been using recently to nix-ify my services without needing to deal
with nix profiles. The idea is to encode the nix store path to everything
directly into the systemd service file, and install that file normally. In this
case this looks something like:
```
pkgs.writeTextFile {
name = "mediocregopher-maddy-service";
text = ''
[Unit]
Description=mediocregopher maddy
Documentation=man:maddy(1)
Documentation=man:maddy.conf(5)
Documentation=https://maddy.email
After=network.target
[Service]
Type=notify
NotifyAccess=main
Restart=always
RestartSec=1s
User=maddy
Group=maddy
# cd to state directory to make sure any relative paths
# in config will be relative to it unless handled specially.
WorkingDirectory=/mnt/vol1/maddy
ReadWritePaths=/mnt/vol1/maddy
# ... lots of directives from
# https://github.com/foxcpp/maddy/blob/master/dist/systemd/maddy.service
# that we'll elide here ...
ExecStart=${pkgs.maddy}/bin/maddy -config ${./maddy.conf}
ExecReload=/bin/kill -USR1 $MAINPID
ExecReload=/bin/kill -USR2 $MAINPID
[Install]
WantedBy=multi-user.target
'';
}
```
With the service now testable, it falls on me to actually go through the setup
steps described in the [tutorial][tutorial].
## Following The Tutorial
The first step in the tutorial is setting up of domain names, which I first
perform in cloudflare (where my DNS is hosted) and then reflect into the conf
file. Then I point the `tls file` configuration line at my LetsEncrypt
directory by changing the line to:
```
tls file /etc/letsencrypt/live/$(hostname)/fullchain.pem /etc/letsencrypt/live/$(hostname)/privkey.pem
```
maddy can access these files thanks to the `setfacl` command I performed
earlier.
At this point the server should be effectively configured. However, starting it
via systemd results in this error:
```
failed to load /etc/letsencrypt/live/mx.mydomain.com/fullchain.pem and /etc/letsencrypt/live/mx.mydomain.com/privkey.pem
```
(For my own security I'm not going to be using the actual email domain in this
post, I'll use `mydomain.com` instead.)
This makes sense... I use a wildcard domain with LetsEncrypt, so certs for the
`mx` sub-domain specifically won't exist. I need to figure out how to tell maddy
to use the wildcard, or actually create a separate certificate for the `mx`
sub-domain. I'd rather the former, obviously, as it's far less work.
Luckily, making it use the wildcard isn't too hard, all that is needed is to
change the `tls file` line to:
```
tls file /etc/letsencrypt/live/$(primary_domain)/fullchain.pem /etc/letsencrypt/live/$(primary_domain)/privkey.pem
```
This works because my `primary_domain` domain is set to the top-level
(`mydomain.com`), which is what the wildcard cert is issued for.
At this point maddy is up and running, but there's still a slight problem. maddy
appears to be placing all of its state files in `/var/lib/maddy`, even though
I'd like to place them in `/mnt/vol1/maddy`. I had set the `WorkingDirectory` in
the systemd service file to this, but apparently that's not enough. After
digging through the codebase I discover an undocumented directive which can be
added to the conf file:
```
state_dir /mnt/vol1/maddy
```
Kind of annoying, but at least it works.
The next step is to fiddle with DNS records some more. I add the SPF, DMARC and
DKIM records to cloudflare as described by the tutorial (what do these do? I
have no fuckin clue).
I also need to set up MTA-STS (again, not really knowing what that is). The
tutorial says I need to make a file with certain contents available at the URL
`https://mta-sts.mydomain.com/.well-known/mta-sts.txt`. I love it when protocol
has to give up and resort to another one in order to keep itself afloat, it
really inspires confidence.
Anyway, I set that subdomain up in cloudflare, and add the following to my nginx
configuration:
```
server {
listen 80;
server_name mta-sts.mydomain.com;
include include/public_whitelist.conf;
location / {
return 404;
}
location /.well-known/mta-sts.txt {
# Check out openresty if you want to get super useful nginx plugins, like
# the echo module, out-of-the-box.
echo 'mode: enforce';
echo 'max_age: 604800';
echo 'mx: mx.mydomain.com';
}
}
```
(Note: my `public_whitelist.conf` only allows cloudflare IPs to access this
sub-domain, which is something I do for all sub-domains which I can put through
cloudflare.)
Finally, I need to create some actual credentials in maddy with which to send my
email. I do this via the `maddyctl` command-line utility:
```
> sudo maddyctl --config maddy.conf creds create 'me@mydomain.com'
Enter password for new user:
> sudo maddyctl --config maddy.conf imap-acct create 'me@mydomain.com'
```
## Send It!
At this point I'm ready to actually test the email sending. I'm going to use
[S-nail][snail] to do so, and after reading through the docs there I put the
following in my `~/.mailrc`:
```
set v15-compat
set mta=smtp://me%40mydomain.com:password@localhost:587 smtp-use-starttls
```
And attempt the following `mailx` command to send an email from my new mail
server:
```
> echo 'Hello! This is a cool email' | mailx -s 'Subject' -r 'Me <me@mydomain.com>' 'test.email@gmail.com'
reproducible_build: TLS certificate does not match: localhost:587
/home/mediocregopher/dead.letter 10/313
reproducible_build: ... message not sent
```
Damn. TLS is failing because I'm connecting over `localhost`, but maddy is
serving the TLS certs for `mydomain.com`. Since I haven't gone through the steps
of exposing maddy publicly yet (which would require port forwarding in my
router, as well as opening a port in iptables) I can't properly test this with
TLS not being required. _It's very important that I remember to re-require TLS
before putting anything public._
In the meantime I remove the `smtp-use-starttls` entry from my `~/.mailrc`, and
retry the `mailx` command. This time I get a different error:
```
reproducible_build: SMTP server: 523 5.7.10 TLS is required
```
It turns out there's a further configuration directive I need to add, this time
in `maddy.conf`. Within my `submission` configuration block I add the following
line:
```
insecure_auth true
```
This allows plaintext auth over non-TLS connections. Kind of sketchy, but again
I'll undo this before putting anything public.
Finally, I try the `mailx` command one more time, and it successfully returns!
Unfortunately, no email is ever received in my gmail :( I check the maddy logs
and see what I feared most all along:
```
Jun 29 08:44:58 maddy[127396]: remote: cannot use MX {"domain":"gmail.com","io_op":"dial","msg_id":"5c23d76a-60db30e7","reason":"dial tcp 142.250.152.26:25: connect: connection timed out","remote_addr":"142.250.152.
26:25","remote_server":"alt1.gmail-smtp-in.l.google.com.","smtp_code":450,"smtp_enchcode":"4.4.2","smtp_msg":"Network I/O error"}
```
My ISP is blocking outbound connections on port 25. This is classic email
bullshit; ISPs essentially can't allow outbound SMTP connections, as email is so
easily abusable it would drastically increase the amount of spam being sent from
their networks.
## Lessons Learned
The next attempt will involve an external VPS which allows SMTP, and a lot more
interesting configuration. But for now I'm forced to turn off maddy and let this
dream sit for a little while longer.
[miab]: https://mailinabox.email/
[maddy]: https://maddy.email
[nixpkgs]: https://search.nixos.org/packages?channel=21.05&from=0&size=50&sort=relevance&query=maddy
[tutorial]: https://maddy.email/tutorials/setting-up/
[le]: https://letsencrypt.org/
[snail]: https://wiki.archlinux.org/title/S-nail

View File

@ -1,440 +0,0 @@
---
title: >-
Visualization 7
description: >-
Feedback Loop.
series: viz
tags: tech art
---
<script type="text/javascript">
function randn(n) {
return Math.floor(Math.random() * n);
}
const w = 100;
const h = 60;
class Canvas {
constructor(canvasDOM) {
this.dom = canvasDOM;
this.ctx = canvasDOM.getContext("2d");
// expand canvas element's width to match parent.
this.dom.width = this.dom.parentElement.offsetWidth;
// rectSize must be an even number or the pixels don't display nicely.
this.rectSize = Math.floor(this.dom.width / w /2) * 2;
this.dom.width = w * this.rectSize;
this.dom.height = h * this.rectSize;
}
rectSize() {
return Math.floor(this.dom.width / w);
}
}
class UniverseState {
constructor(layers) {
this.tick = 0;
this.layers = layers;
}
neighboringLayers(layerIndex) {
const prevIndex = layerIndex-1;
const prev = prevIndex < 0 ? null : this.layers[prevIndex];
const nextIndex = layerIndex+1;
const next = nextIndex >= this.layers.length ? null : this.layers[nextIndex];
return [prev, next];
}
}
const defaultKnobs = {
maxNewElsPerTick: 10,
ageOfDeath: 30,
drift: 30,
neighborScalar: 0,
prevLayerScalar: 0,
prevLayerLikenessScalar: 0,
nextLayerScalar: 0,
nextLayerLikenessScalar: 0,
chaos: 0,
};
class Layer {
constructor(className, newEl, knobs = {}) {
this.className = className;
this.els = {};
this.diff = {};
this.newEl = newEl;
this.knobs = { ...defaultKnobs, ...knobs };
}
_normCoord(coord) {
if (typeof coord !== 'string') coord = JSON.stringify(coord);
return coord;
}
get(coord) {
return this.els[this._normCoord(coord)];
}
getAll() {
return Object.values(this.els);
}
set(coord, el) {
this.diff[this._normCoord(coord)] = {action: "set", coord: coord, ...el};
}
unset(coord) {
this.diff[this._normCoord(coord)] = {action: "unset"};
}
applyDiff() {
for (const coordStr in this.diff) {
const el = this.diff[coordStr];
delete this.diff[coordStr];
if (el.action == "set") {
delete el.action;
this.els[coordStr] = el;
} else {
delete this.els[coordStr];
}
}
}
update(state, thisLayerIndex) {
// Apply diff from previous update first. The diff can't be applied last
// because it needs to be present during the draw phase.
this.applyDiff();
const allEls = this.getAll().sort(() => Math.random() - 0.5);
if (allEls.length == 0) {
const newEl = this.newEl(this, [])
newEl.tick = state.tick;
this.set([w/2, h/2], newEl);
return;
}
let newEls = 0;
for (const el of allEls) {
const nCoord = randEmptyNeighboringCoord(this, el.coord);
if (!nCoord) continue; // el has no empty neighboring spots
const newEl = this.newEl(this, neighboringElsOf(this, nCoord))
newEl.tick = state.tick;
this.set(nCoord, newEl);
newEls++;
if (newEls >= this.knobs.maxNewElsPerTick) break;
}
const calcLayerBonus = (el, layer, scalar, likenessScalar) => {
if (!layer) return 0;
const nEls = neighboringElsOf(layer, el.coord, true)
const likeness = nEls.reduce((likeness, nEl) => {
const diff = Math.abs(nEl.c - el.c);
return likeness + Math.max(diff, Math.abs(1 - diff));
}, 0);
return (nEls.length * scalar) + (likeness * likenessScalar);
};
const [prevLayer, nextLayer] = state.neighboringLayers(thisLayerIndex);
for (const el of allEls) {
const age = state.tick - el.tick;
const neighborBonus = neighboringElsOf(this, el.coord).length * this.knobs.neighborScalar;
const prevLayerBonus = calcLayerBonus(el, prevLayer, this.knobs.prevLayerScalar, this.knobs.prevLayerLikenessScalar);
const nextLayerBonus = calcLayerBonus(el, nextLayer, this.knobs.nextLayerScalar, this.knobs.nextLayerLikenessScalar);
const chaos = (this.chaos > 0) ? randn(this.knobs.chaos) : 0;
if (age - neighborBonus - prevLayerBonus - nextLayerBonus + chaos >= this.knobs.ageOfDeath) {
this.unset(el.coord);
}
}
}
draw(canvas) {
for (const coordStr in this.diff) {
const el = this.diff[coordStr];
const coord = JSON.parse(coordStr);
if (el.action == "set") {
canvas.ctx.fillStyle = `hsl(${el.h}, ${el.s}, ${el.l})`;
canvas.ctx.fillRect(
coord[0]*canvas.rectSize, coord[1]*canvas.rectSize,
canvas.rectSize, canvas.rectSize,
);
} else {
canvas.ctx.clearRect(
coord[0]*canvas.rectSize, coord[1]*canvas.rectSize,
canvas.rectSize, canvas.rectSize,
);
}
}
}
}
const neighbors = [
[-1, -1], [0, -1], [1, -1],
[-1, 0], /* [0, 0], */ [1, 0],
[-1, 1], [0, 1], [1, 1],
];
function neighborsOf(coord) {
return neighbors.map((n) => {
let nX = coord[0]+n[0];
let nY = coord[1]+n[1];
nX = (nX + w) % w;
nY = (nY + h) % h;
return [nX, nY];
});
}
function randEmptyNeighboringCoord(layer, coord) {
const neighbors = neighborsOf(coord).sort(() => Math.random() - 0.5);
for (const nCoord of neighbors) {
if (!layer.get(nCoord)) return nCoord;
}
return null;
}
function neighboringElsOf(layer, coord, includeCoord = false) {
const neighboringEls = [];
const neighboringCoords = neighborsOf(coord);
if (includeCoord) neighboringCoords.push(coord);
for (const nCoord of neighboringCoords) {
const el = layer.get(nCoord);
if (el) neighboringEls.push(el);
}
return neighboringEls;
}
function newEl(h, l) {
return {
h: h,
s: "100%",
l: l,
c: h / 360, // c is used to compare the element to others
};
}
function mkNewEl(l) {
return (layer, nEls) => {
const s = "100%";
if (nEls.length == 0) {
const h = randn(360);
return newEl(h, l);
}
// for each h (which can be considered as degrees around a circle) break the
// h down into x and y vectors, and add those up separately. Then find the
// angle between those two resulting vectors, and that's the "average" h
// value.
let x = 0;
let y = 0;
nEls.forEach((el) => {
const hRad = el.h * Math.PI / 180;
x += Math.cos(hRad);
y += Math.sin(hRad);
});
let h = Math.atan2(y, x);
h = h / Math.PI * 180;
// apply some random drift, normalize
h += (Math.random() * layer.knobs.drift * 2) - layer.knobs.drift;
h = (h + 360) % 360;
return newEl(h, l);
}
}
class Universe {
constructor(canvasesByClass, layers) {
this.canvasesByClass = canvasesByClass;
this.state = new UniverseState(layers);
}
update() {
this.state.tick++;
let prevLayer;
this.state.layers.forEach((layer, i) => {
layer.update(this.state, i);
prevLayer = layer;
});
}
draw() {
this.state.layers.forEach((layer) => {
if (!this.canvasesByClass[layer.className]) return;
this.canvasesByClass[layer.className].forEach((canvas) => {
layer.draw(canvas);
});
});
}
}
</script>
<style>
.canvasContainer {
display: grid;
margin-bottom: 2rem;
text-align: center;
}
canvas {
border: 1px dashed #AAA;
width: 100%;
grid-area: 1/1/2/2;
}
</style>
<div class="canvasContainer">
<canvas class="layer1"></canvas>
<canvas class="layer2"></canvas>
</div>
<div class="row">
<div class="columns six">
<h3>Bottom Layer</h3>
<div class="canvasContainer"><canvas class="layer1"></canvas></div>
<div class="layer1 layerParams">
<label>Max New Elements Per Tick</label><input type="text" param="maxNewElsPerTick" />
<label>Color Drift</label><input type="text" param="drift" />
<label>Age of Death</label><input type="text" param="ageOfDeath" />
<label>Neighbor Scalar</label><input type="text" param="neighborScalar" />
<label>Top Layer Neighbor Scalar</label><input type="text" param="nextLayerScalar" />
<label>Top Layer Neighbor Likeness Scalar</label><input type="text" param="nextLayerLikenessScalar" />
</div>
</div>
<div class="columns six">
<h3>Top Layer</h3>
<div class="canvasContainer"><canvas class="layer2"></canvas></div>
<div class="layer2 layerParams">
<label>Max New Elements Per Tick</label><input type="text" param="maxNewElsPerTick" />
<label>Color Drift</label><input type="text" param="drift" />
<label>Age of Death</label><input type="text" param="ageOfDeath" />
<label>Neighbor Scalar</label><input type="text" param="neighborScalar" />
<label>Bottom Layer Neighbor Scalar</label><input type="text" param="prevLayerScalar" />
<label>Bottom Layer Neighbor Likeness Scalar</label><input type="text" param="prevLayerLikenessScalar" />
</div>
</div>
</div>
Once again, this visualization iterates upon the previous. In the last one the
top layer was able to "see" the bottom, and was therefore able to bolster or
penalize its own elements which were on or near bottom layer elements, but not
vice-versa. This time both layers can see each other, and the "Layer Neighbor
Scalar" can be used to adjust lifetime of elements which are on/near elements of
the neighboring layer.
By default, the bottom layer has a high affinity to the top, and the top layer
has a some (but not as much) affinity in return.
Another addition is the "likeness" scalar. Likeness is defined as the degree to
which one element is like another. In this visualization likeness is determined
by color. The "Layer Neighbor Likeness Scalar" adjusts the lifetime of elements
based on how like they are to nearby elements on the neighboring layer.
By default, the top layer has a high affinity for the bottom's color, but the
bottom doesn't care about the top's color at all (and so its color will drift
aimlessly).
And finally "Color Drift" can be used to adjust the degree to which the color of
new elements can diverge from its parents. This has always been hardcoded, but
can now be adjusted separately across the different layers.
In the default configuration the top layer will (eventually) converge to roughly
match the bottom both in shape and color. When I first implemented the likeness
scaling I thought it was broken, because the top would never converge to the
bottom's color.
What I eventually realized was that the top must have a higher color drift than
the bottom in order for it to do so, otherwise the top would always be playing
catchup. However, if the drift difference is _too_ high then the top layer
becomes chaos and also doesn't really follow the color of the bottom. A
difference of 10 (degrees out of 360) is seemingly enough.
<script>
const canvasesByClass = {};
[...document.getElementsByTagName("canvas")].forEach((canvasDOM) => {
const canvas = new Canvas(canvasDOM);
canvasDOM.classList.forEach((name) => {
if (!canvasesByClass[name]) canvasesByClass[name] = [];
canvasesByClass[name].push(canvas);
})
});
const layers = [
new Layer("layer1", mkNewEl("90%"), {
maxNewElsPerTick: 2,
ageOfDeath: 30,
drift: 40,
neighborScalar: 50,
nextLayerScalar: 20,
}),
new Layer("layer2", mkNewEl("50%", ), {
maxNewElsPerTick: 15,
ageOfDeath: 1,
drift: 50,
neighborScalar: 5,
prevLayerScalar: 5,
prevLayerLikenessScalar: 20,
}),
];
for (const layer of layers) {
document.querySelectorAll(`.${layer.className}.layerParams > input`).forEach((input) => {
const param = input.getAttribute("param");
// pre-fill input values
input.value = layer.knobs[param];
input.onchange = () => {
console.log(`setting ${layer.className}.${param} to ${input.value}`);
layer.knobs[param] = input.value;
};
});
}
const universe = new Universe(canvasesByClass, layers);
const requestAnimationFrame =
window.requestAnimationFrame ||
window.mozRequestAnimationFrame ||
window.webkitRequestAnimationFrame ||
window.msRequestAnimationFrame;
function doTick() {
universe.update();
universe.draw();
requestAnimationFrame(doTick);
}
doTick();
</script>

View File

@ -1,115 +0,0 @@
---
title: >-
Setting Up maddy On A VPS
description: >-
We have delivery!
tags: tech
series: selfhost
---
In the previous post I left off with being blocked by my ISP from sending
outbound emails on port 25, effectively forcing me to set up [maddy][maddy] on a
virtual private server (VPS) somewhere else.
After some research I chose [Vultr][vultr] as my VPS of choice. They apparently
don't block you from sending outbound emails on port 25, and are in general
pretty cheap. I rented their smallest VPS server for $5/month, plus an
additional $3/month to reserve an IPv4 address (though I'm not sure I really
need that, I have dDNS set up at home and could easily get that working here as
well).
## TLS
The first major hurdle was getting TLS certs for `mydomain.com` (not the real
domain) onto my Vultr box. For the time being I've opted to effectively
copy-paste my local [LetsEncrypt][le] setup to Vultr, using certbot to
periodically update my records using DNS TXT challenges.
The downside to this is that I now require my Cloudflare API key to be present
on the Vultr box, which effectively means that if the box ever gets owned
someone will have full access to all my DNS. For now I've locked down the box as
best as I can, and will look into changing the setup in the future. There's two
ways I could go about it:
* SCP the certs from my local box to the remote everytime they're renewed. This
would require setting up a new user on the remote box with very narrow
privileges. This isn't the worst thing though.
* Use a different challenge method than DNS TXT records.
But again, I'm trying to set up maddy, not LetsEncrypt, and so I needed to move
on.
## Deployment
In the previous post I talked about how I'm using nix to generate a systemd
service file which encompasses all dependencies automatically, without needing
to install anything to the global system or my nix profile.
Since that's already been set up, it's fairly trivial to use `nix-copy-closure`
to copy a service file, and _all_ of its dependencies (including configuration)
from my local box to the remote Vultr box. Simply:
```
nix-copy-closure -s <ssh host> <nix store path>
```
I whipped up some scripts around this so that I can run a single make target and
have it build the service (and all deps), do a `nix-copy-closure` to the remote
host, copy the service file into `/etc/systemd/service`, and restart the
service.
## Changes
For the most part the maddy deployment on the remote box is the same as on the
local one. Down the road I will likely change them both significantly, so that
the remote one only deals with SMTP (no need for IMAP) and the local one will
automatically forward all submitted messages to it.
Once that's done, and the remote Vultr box is set up on my [nebula][nebula]
network, there won't be a need for the remote maddy to do any SMTP
authentication, since the submission endpoint can be made entirely private.
For now, however, I've set up maddy on the remote box's public interface with
SMTP authentication enabled, to make testing easier.
## Testing
And now, to test it! I changed the SMTP credentials in my `~/.mailrc` file as
appropriate, and let a test email rip:
```
echo 'Hello! This is a cool email' | mailx -s 'Subject' -r 'Me <me@mydomain.com>' 'test.email@gmail.com'
```
This would, ideally, send an email from my SMTP server (on my domain) to a test
gmail domain. Unfortunately, it did not do that, but instead maddy spit this out
in its log:
> maddy[1547]: queue: delivery attempt failed {"msg_id":"330a1ed9","rcpt":"mediocregopher@gmail.com","reason":"[2001:19f0:5001:355a:5400:3ff:fe73:3d02] Our system has detected that\nthis message does not meet IPv6 sending guidelines regarding PTR\nrecords and authentication. Please review\n https://support.google.com/mail/?p=IPv6AuthError for more information\n. gn42si18496961ejc.717 - gsmtp","remote_server":"gmail-smtp-in.l.google.com.","smtp_code":550,"smtp_enchcode":"5.7.1","smtp_msg":"gmail-smtp-in.l.google.com. said: [2001:19f0:5001:355a:5400:3ff:fe73:3d02] Our system has detected that\nthis message does not meet IPv6 sending guidelines regarding PTR\nrecords and authentication. Please review\n https://support.google.com/mail/?p=IPv6AuthError for more information\n. gn42si18496961ejc.717 - gsmtp"}
Luckily Vultr makes setting up PTR records for reverse DNS fairly easy. They
even allowed me to do it on my box's IPv6 address which I'm not paying to
reserve (though I'm not sure what the long-term risks of that are... can it
change?).
Once done, I attempted to send my email again, and what do you know...
![Success!](/assets/maddy-vps/success.png)
Success!
So now I can send emails. There are a few next steps from here:
* Get the VPS on my nebula network and lock it down properly.
* Fix the TLS cert situation.
* Set up the remote maddy to forward submissions to my local maddy.
* Use my sick new email!
[maddy]: https://maddy.email
[le]: https://letsencrypt.org/
[vultr]: https://www.vultr.com/
[nebula]: https://github.com/slackhq/nebula

View File

@ -1,315 +0,0 @@
---
title: >-
How to Secure a Webapp
description: >-
Get ready to jump through some hoops.
tags: tech
---
In this post I will be documenting all security hoops that one must jump through
in order to consider their webapp secure. This list should not be considered
comprehensive, as I might have forgotten something or some new hoop might have
appeared since writing.
For the context of this post a "webapp" will be considered to be an HTML/CSS/JS
website, loaded in a browser, with which users create and access accounts using
some set of credentials (probably username and password). In other words, most
popular websites today. This post will only cover those concerns which apply to
_all_ webapps of this nature, and so won't dive into any which might be incurred
by using one particular technology or another.
Some of these hoops might seem redundant or optional. That may be the case. But
if you are building a website and are beholden to passing some third-party
security audit for any reason you'll likely find yourself being forced to
implement most, if not all, of these measures anyway.
So without further ado, let's get started!
## HTTPS
At this point you have to use HTTPS, there's not excuse for not doing so. All
attempts to hit an HTTP endpoint should redirect to the equivalent HTTPS
endpoint, and you should be using [HSTS][hsts] to ensure that a browser is never
tricked into falling back to HTTP via some compromised DNS server.
[hsts]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Strict-Transport-Security
## Cookies
Cookies are an old web technology, and have always been essentially broken. Each
cookie can have certain flags set on it which change their behavior, and some of
these flags are required at this point.
### Secure
If you're storing anything sensitive in a cookie (spoiler alert: you will be)
then you need to have the Secure flag set on it. This prevents the cookie from
being sent in a non-HTTPS request.
### HTTPOnly
The HTTPOnly flag protects a cookie from XSS attacks by preventing it from being
accessible from javascript. Any cookie which is storing sensitive information
_must_ have this flag set. In the **Authentication** section we will cover the
storage of session tokens, but the TLDR is that they have to be stored in an
HTTPOnly cookie.
Practically, this means that your sessions architecture _must_ account for the
fact that the webapp itself will not have direct access to its persistent
session token(s), and therefore must have some other way of knowing that it's
logged in (e.g. a secondary, non-HTTPOnly cookie which contains no secrets but
only signals that the browser is logged in).
### SameSite
The SameSite attribute can be set to `Strict`, `Lax`, or `None`. `Lax` is the
default in modern browsers and is sufficient for most security concerns, but if
you can go with `Strict` that would be better. The downside of `Strict` is that
cookies won't be sent on initial page-load of a site.
In any case, even though `Lax` is the default you should still set this
attribute manually (or your auditor might get to add another bullet point to
their report).
## Authentication
Authentication is obviously one of the juiciest targets for an attacker. It's
one thing to be able to trick a user into performing this or that action, but if
one can just log in _as_ the user then they essentially have free-reign over all
their information.
### Password History
Most websites use a username/password system as the first step of login. This
is.... fine. We've accepted it, at any rate. But there's a couple of hoops which
must be jumped through as a result of it, and the first is password history.
I hope it goes without saying that one should be using a hashing algorithm like
bcrypt to store user passwords. But what is often not said is that, for each
user, you need to store the hashes of their last N passwords (where N is
something like 8). This way if they attempt to re-use an old password they are
not able to do so. The users must be protected from themselves, afterall.
### Credential Stuffing/Account Enumeration
A credential stuffing attack is one where credentials are stolen from one
website and then attempted to be used on another, in the hope that users have
re-used their username/password across multiple sites. When they occur it'll
often look like a botnet spamming the authentication endpoint with tons of
different credentials.
Account enumeration is a similar attack: it's where an attacker finds a way to
get the webapp to tell them whether or not an account email/username exists in
the system, without needing to have the right password. This is often done by
analyzing the error messages returned from login or a similar endpoint (e.g.
"Sorry this username is taken"). They then run through all possible values for
that endpoint to try and enumerate which users actually exist in the system.
Account enumeration is tricky because often those errors are extremely helpful,
and we'd _like_ to keep them if we can.
I've bucketed both of these attacks in the same section because they have a
similar solution: proof-of-work. The idea is that, for each request to some
sensitive endpoint, the client must send some proof that they've done an
intensive CPU computation.
Compared to IP-based rate-limiting, PoW is much more effective against botnets
(which have a limitless set of IPs from which to spam you), while also being
much less intrusive on your real users than a captcha.
PoW stymies botnets because they are generally being hosted by low-power,
compromised machines. In addition the systems that run these botnets are pretty
shallow in capability, because it's more lucrative to rent the botnet out then
to actually use it yourself, so it's rare for a botnet operator to go to the
trouble of implementing your PoW algorithm in the first place.
So stick a PoW requirement on any login or account creation endpoint, or any
other endpoint which might be used to enumerate accounts in the system. You can
even make the PoW difficulty rise in relation to number of recent attempts on
these endpoints, if you're feeling spry.
### MFA
All the PoW checks in the world won't help your poor user who isn't using a
different username/password for each website, and who got unlucky enough to have
those credentials leaked in a hack of a completely separate site than your own.
They also won't help your user if they _are_ using different username/passwords
for everything, but their machine gets straight up stolen IRL and the attacker
gets access to their credential storage.
What _will_ help them in these cases, however, is if your site supports
multi-factor authentication, such as [TOTP][totp]. If it does then your user
will have a further line of defense in the form of another password which
changes every 30 seconds, and which can only be accessed from a secondary device
(like their phone). If your site claims to care about the security of your
user's account then MFA is an absolute requirement.
It should be noted, however, that not all MFA is created equal. A TOTP system
is great, but a one-time code being sent over SMS or email is totally different
and not nearly as great. SMS is vulnerable to [SIM jacking][sim], which can be
easily used in a targeted attack against one of your users. One-time codes over
email are pointless for MFA, as most people have their email logged in on their
machine all the time, so if someone steals your user's machine they're still
screwed.
In summary: MFA is essentially required, _especially_ if the user's account is
linked to anything valuable, and must be done with real MFA systems like TOTP,
not SMS or email.
[totp]: https://www.twilio.com/docs/glossary/totp
[sim]: https://www.vice.com/en/article/3kx4ej/sim-jacking-mobile-phone-fraud
### Login Notifications
Whenever a user successfully logs into their account you should send them email
(or some other notification) letting them know it happened. This way if it
wasn't actually them who did so, but an attacker, they can perhaps act quickly
to lock down their account and prevent any further harm. The login notification
email should have some kind of link in it which can be used to immediately lock
the account.
### Token Storage
Once your user has logged into your webapp, it's up to you, the developer, to
store their session token(s) somewhere. The question is... where? Well this
one's easy, because there's only one right answer: HTTPOnly cookies (as alluded
to earlier).
When storing session tokens you want to guard against XSS attacks which might
grab the tokens and send them to an attacker, allowing that attacker to hijack
the session and pose as the user. This means the following are not suitable
places to store the tokens:
* Local storage.
* `window`, or anything which can be accessed via `window`.
* Non-HTTPOnly cookies.
Any of these are trivial to find by a script running in the browser. If a
session token is ephemeral then it may be stored in a "normal" javascript
variable somewhere _as long as_ that variable isn't accessible from a global
context. But for any tokens which need to be persisted across browser restarts
an HTTPOnly cookie is your only option.
## Cross-Site
Speaking of XSS attacks, we have some more mitigation coming up...
### CSP
Setting a [CSP][csp] for your website is key to preventing XSS. A CSP allows you
to more tightly control the allowed origins of the various entities on your site
(be they scripts, styles, images, etc...). If an entity of unexpected origin
shows up it is disallowed.
Be sure to avoid any usages of the policies labeled "unsafe" (go figure),
otherwise the CSP is rendered somewhat pointless. Also, when using hostname
based allowlisting try to be as narrow as you can in your allowlist, and
especially only include https hosts. If you can you should opt for the `nonce`
or `sha` policies.
[csp]: https://developer.mozilla.org/en-US/docs/Web/HTTP/CSP
### SVG
A small but important note: if you're website allows users to upload images,
then be _very_ careful about allowing users to upload SVGs. SVGs are actually
XML documents, and even worse than that they allow `<script>` tags within them!
So you need to be very careful about allowing SVGs to be uploaded. If you can
get away with it, it's better to disallow their use at all.
## CSRF
The web was designed in a time when cross-site requests were a considered
feature. This has proven to be a massive mistake. We have two cross-site request
prevention techniques in this list. The first is CSRF.
CSRF protection will cover you from a variety of attacks, mostly of the kind
where an attacker embeds a `<form>` on their own webpage, with the form set up
to POST to _your_ website in some way. When a user of your website lands on the
attacker's page and triggers the POST, the POST will be sent along with whatever
cookies the user has stored in their browser for _your_ site!
The attacker could, potentially, trick a user into submitting a password-reset
request using a known value, or withdrawing all their money into the attacker's
bank account, or anything else the user might be able to do on their own.
The idea with CSRF is that any HTTP request made against an API should have an
unguessable token as a required parameter, called the CSRF token. The CSRF token
should be given to your webapp in a way where only your webapp could know it.
There are many ways to accomplish this, including a cookie, server-side embedded
value, etc... OWASP has put together an [entire cheatsheet full of CSRF
methods][csrf] which is well worth checking out.
[csrf]: https://cheatsheetseries.owasp.org/cheatsheets/Cross-Site_Request_Forgery_Prevention_Cheat_Sheet.html
## CORS
CORS is the other half of cross-site protection. With CSRF in place it's
somewhat redundant, but it's good to have multiple layers of protection in place
(in case you fuck up one of them by accident).
The key thing one must do for CORS protection is to set the
`Access-Control-Allow-Origin` to the origin a request is being sent from _only
if you trust that origin_. If you stick a wildcard in that header then you're
not doing anything.
## Random Headers
The rest of this is random HTTP headers which must be set in various contexts to
protect your users.
### Permissions Policy
The [Permissions-Policy][pp] header is fairly new and not fully standardized
yet, but there is support for it so it's worth using. It allows you to specify
exactly which browser features you expect your webapp to need, and therefore
prevent an attacker from taking advantage of some other feature that you were
never going to use anyway.
[pp]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Feature-Policy
### X-Content-Type-Options
It's important to set `X-Content-Type-Options: nosniff` on virtually all
HTTP responses, in order to (theoretically) prevent a browser from inferring the
MIME of the returned content.
### X-Frame-Options
Set `X-Frame-Options: deny` to prevent your webapp from being rendered in a
frame or iframe on someone else's site, which might then be used to trick one of
your users into doing something stupid.
### X-XSS-Protection
Set `X-XSS-Protection: 1; mode=block` to give older browsers which lack CSP
support some extra defense against XSS attacks. It's not super clear to me what
exactly this actually does, but it's easy enough to set.
### Referrer-Policy
Set the `Referrer-Policy` to inform your users' browsers to not send the
`Referer` header to third-party sites when your users navigate away from your
site. You don't want other websites to be able to see _yours_ in their logs, as
they could then correlate which users of theirs have accounts with you (and so
potentially have some easy targets).
### Cache-Control/Pragma
For all requests which return sensitive information (i.e. any authenticated
requests) it's important to set `Cache-Control: no-store` and `Pragma: no-cache`
on the response. This prevents some middle server or the browser from caching
the response, and potentially returning it later to someone else using your site
from the same location.
## That's It
It's probably not it, actually, these are just what I could think of off the top
of my head. Please email me if I missed any.
If you, like me, find yourself asking "how is anyone supposed to have figured
this out?" then you should A) thank me for writing it all down for you and B)
realize that at least 50% of this list has nothing to do with the web, really,
and everything to do with covering up holes that backwards compatibility has
left open. We can cover these holes, we just need everyone to agree on the path
to doing so, and to allow ourselves to leave some ancient users behind.

View File

@ -1,248 +0,0 @@
---
title: >-
V4 of Radix, a Golang Redis Driver
description: >-
What's new, what's improved, and where we're going from here.
tags: tech
---
Radix is a Go driver for the [Redis][redis] database. The current stable release
is v3, the docs for which can be found [here][v3]. Over the past year
(perhaps longer) I've been working on a new version, v4, with the aim of
addressing some of the shortcomings of v3 and distilling the API a bit better.
At this point v4 is in beta. While there's still some internal bugs and QoL
improvements which need to be made, the API is roughly stable and I wouldn't
discourage anyone from using it for a non-critical project. In the coming months
I intend on finishing the polish and tagging a `v4.0.0` release, but in the
meantime let's go over the major changes and improvements in v4!
You can see the v4 documentation [here][v4], if you'd like to follow along with
any of the particulars, and you can see the full CHANGELOG [here][changelog].
## Shoutouts
Before continuing I want to give to give a huge shoutout to
[nussjustin][nussjustin]. Since before v3 was even stable Justin has been
contributing to radix in every way possible, from running benchmarks and making
very low-level performance improvements to building whole user-facing features
and responding to github issues when I get lost in the woods. Thank you Justin!
## RESP3
Starting at the lowest level, v4 supports new redis's new wire protocol,
[RESP3][resp3]. This new protocol is (mostly) backwards compatible with the
previous wire protocol, and is really more an extension than anything. The [new
resp3 sub-package][resp3pkg] is capable of marshaling and unmarshaling all new
wire types, including the streamed aggregates and streamed strings.
A major improvement made on the API level is addition of the
[resp.Opts][respOpts] type, which is used to propagate things like byte buffers
and buffered readers. Doing this allows the resp3 package to reduce memory
allocations without relying on something like `sync.Pool`, which introduces
locking overhead.
There's still some question to be answered regarding the best way for the main
radix package to deal with the new push and attribute types, but the resp3
package is general-purpose enough to handle most strategies in the future.
In fact, the RESP3 protocol as a whole (and therefore v4's associated resp3
sub-package) is totally usable outside of redis. If you're looking for a
human-readable, binary safe, fast, and simple wire protocol which already has
great tooling and libraries across multiple programming languages, I highly
recommend checking out RESP3.
## Conn
Arguably one of the biggest design warts of v3, in my eyes, is the
[CmdAction][cmdaction] type. This type required to allow for pipelining, which
is a feature of redis where you can write new commands to a redis connection
prior to previous ones returning their results. The major upside of pipelining
is that N pipelined commands will only result in 2 system calls (a network write
then a network read), rather than 2N system calls (N writes and N reads) if each
command was performed independently.
The normal v3 Action type is fairly opaque, and would perform both the write and
read internally without exposing any way to do some other action in between
(such as performing writes/reads for other commands in a pipeline). CmdAction
extends Action to allow the write and read to be performed independently, and
then leaves it to the Pipeline type to deal with the batching.
v4 gets rid of the need for CmdAction, while allowing even more Action types to
be pipeline-able than before (e.g. [EvalScript][evalscript]). This was done by
coalescing the Encode and Decode methods on the [Conn][conn] type into a single
method: EncodeDecode. By doing this we allow Actions to perform the write/read
steps in a way which groups the two together, but leaves it to Conn to actually
perform the steps in its own way.
Because Conn now has knowledge of which read/write steps go together, it's
possible to perform pipelining in nearly all cases. Aside from using the
Pipeline type manually, the v4 Conn is able to automatically pipeline most
Actions when they are performed concurrently on the same Conn. v3 had a similar
feature, called "implicit pipelining", but v4 rebrands the feature as
"connection sharing" since the mechanism is slightly different and the
applicability is broader.
Despite the apparent simplicity of the change (combining Encode and Decode
methods), this resulted in probably the largest code difference between v3 and
v4, involving the most complex new logic and package-wide refactorings. But the
end result is a simpler, smaller API which can be applied to more use-cases. A
great win!
## Pool
In v3 the connection pool, the Pool type, was implemented with the assumption
that each Action (or CmdAction) would borrow a Conn for the duration of the
Action. As such the Pool expects to be creating and destroying connections as
load increases and decreases; if number of concurrent commands goes up then
number of connections required to handle them goes up as well, and vice-versa.
Down the road the Pool became responsible for performing implicit pipelining as
well. This allowed for grouping together many commands on the same connection,
reducing pressure on connection creation greatly, but nevertheless the Pool kept
that same general pattern of dynamic connection pool sizing.
In v4 there is no longer the assumption that each command gets its own
connection, and in fact that assumption is flipped: each connection is expected
to handle multiple commands concurrently in almost all cases. This means the
Pool can get rid of the dynamism, and opt instead for a simple static connection
pool size. There is still room in the API for some dynamic connection sizing to
be implemented later, but it's mostly unnecessary now.
Some care should be used with commands which _can't_ be pipelined, for example
blocking commands like BRPOPLPUSH and XREAD. These commands, ideally, should be
performed on an individual Conn created just for that purpose. Pool _will_
properly handle them if needed, but with the caveat that the Action which will
essentially remove a Conn from the Pool for its duration.
[The new Pool][pool] is _vastly_ simpler in implementation than the old, as most
of the complexity has been moved into Conn. Really this whole section is an
extension of the refactoring which was started by the changes to Conn.
## MultiClient
In v3 there was a single Client type which was used to encompass Conn, Pool,
Sentinel, and Cluster, with the aim that users could just use Client in their
code and easily swap out the underlying implementation as needed.
In practice this didn't work out. The original Client type only had a Do method
for performing Actions, which would always perform the Actions against the
primary instance in the case of Cluster and Sentinel. Cluster and Sentinel ended
up being extended with DoSecondary methods, and Cluster required its own
constructor for Scanner, so if you used any of those features you would not be
able to use Client.
v4 improves this situation by introducing the [MultiClient][multiclient]
interface, which is implemented by both Cluster and Sentinel, while Conn and
Pool only implement [Client][client]. Client is intended for clients which
interact with only a single redis instance, while MultiClient is intended for
use by clients which encompass multiple redis instances, and makes the
distinction between primary and secondary instances.
In general, users will want to use MultiClient in their code and swap the
underlying implementation as their infrastructure evolves. When using only a
single Pool, one can make it into a MultiClient using the new
[ReplicaSet][replicaset].
One can also implement their own MultiClient's fairly easily, to handle their
own custom sharding or failover systems. It's not a common use-case, but it's
cool that existing types like Scanner will still continue to work.
## Contexts
A common feature request of v3 was for support for Go's [Contexts][context],
which would allow callers to unblock blocked operations in a dynamic way. There
wasn't a clear way to incorporate Contexts into v3 without greatly expanding the
API (something the Go standard library has had to do), and so I saved them for
v4.
In v4 all operations which might potentially block accept a Context argument.
This takes the place of timeout options and some trace events which were used in
v3, and in general simplifies things for the user.
This was a change for which there is not much to talk about, but which required
a _lot_ of work internally. Go's Contexts do not play nicely with its networking
primitives, and making this all work alongside connection sharing and pipelining
is a really hairy puzzle (for which there's a few open bugs still). I may one
day write a blog post just about this topic, if I can figure out how to explain
it in a way which isn't completely mind-numbing.
## Configuration
Constructors in v3 took advantage of the [functional options pattern][opts] for
accepting optional parameters. While this pattern _looks_ nice, I've since
grown out of love with it. The implementation is a lot more complex, its
behavior is more ambiguous to users in certain cases (what happens if the same
option is passed in twice?), it makes documentation more complex, and a slice of
option functions isn't inspectable or serializable like a struct is.
v4 uses a config struct pattern, but in a different way than I've generally seen
it. See [Pool's constructor][pool] for an example. This pattern is functionally
the same as passing the config struct as an argument to the constructor, but I
think it results in a nicer grouping in the documentation.
## Smaller Changes
There's some smaller sets of changes which are worth mentioning. These didn't
result in huge, package-wide changes, but will be useful for users of specific
functionality.
### Action Properties
[v4's Action type][action] has a Properties method which returns a struct
containing various fields which are useful for client's performing the Action.
This is an improvement over v3's Action, which had no such method, in that it's
more extensible going forward. Those implementing their own custom Actions
should take care to understand the Action properties.
### PubSub
The v4 [PubSubConn][pubsub] has been completely redesigned from v3's
implementation. The old design tried to do too much, and resulted in weird
edge-cases when trying to tear down a connection that a user would have to
handle themselves. The new design is simple both in implementation and usage.
### Tracing
The v4 [trace][trace] sub-package has been extended to support tracing Sentinel
events, but at the same time has been cleaned out of all events which could be
otherwise inferred by using Context values or wrapping an interface like Conn,
Action, etc...
## What's Next
Obviously the most immediate goal is to get v4 stable and tagged. Once that's
done I'm sure there will be many small bugs, feature requests, etc... which come
up over time, and I'll do my best to address those as quickly as I can. I'm
very excited to start using v4 in my own day-to-day work like I currently do for
v3; it has a lot of great improvements and new flexibility that will make using
Go and redis together an even better experience than it already is.
That all said, I don't expect there to be a radix v5. I have a lot of other
projects I'd like to work on, and radix is a huge time-sink. As time goes on v4
will stabilize further and further, until all that's left is for it to gain
additional support for whatever new crazy features redis comes up with. My hope
is that the existing API is flexibile enough to allow others to fill in those
gaps without any major changes to the existing code, and radix v4 can be the
final major radix version.
[redis]: https://redis.io
[v3]: https://pkg.go.dev/github.com/mediocregopher/radix/v3#section-documentation
[v4]: https://pkg.go.dev/github.com/mediocregopher/radix/v4#section-documentation
[nussjustin]: https://github.com/nussjustin
[resp3]: https://github.com/antirez/RESP3
[resp3pkg]: https://pkg.go.dev/github.com/mediocregopher/radix/v4/resp/resp3
[respOpts]: https://pkg.go.dev/github.com/mediocregopher/radix/v4/resp#Opts
[changelog]: https://github.com/mediocregopher/radix/blob/v4/CHANGELOG.md
[cmdaction]: https://pkg.go.dev/github.com/mediocregopher/radix/v3#CmdAction
[evalscript]: https://pkg.go.dev/github.com/mediocregopher/radix/v4#EvalScript
[conn]: https://pkg.go.dev/github.com/mediocregopher/radix/v4#Conn
[pool]: https://pkg.go.dev/github.com/mediocregopher/radix/v4#PoolConfig.New
[multiclient]: https://pkg.go.dev/github.com/mediocregopher/radix/v4#MultiClient
[client]: https://pkg.go.dev/github.com/mediocregopher/radix/v4#Client
[replicaset]: https://pkg.go.dev/github.com/mediocregopher/radix/v4#ReplicaSet
[context]: https://blog.golang.org/context
[opts]: https://dave.cheney.net/2014/10/17/functional-options-for-friendly-apis
[action]: https://pkg.go.dev/github.com/mediocregopher/radix/v4#Action
[pubsub]: https://pkg.go.dev/github.com/mediocregopher/radix/v4#PubSubConn
[trace]: https://pkg.go.dev/github.com/mediocregopher/radix/v4/trace

View File

@ -1,209 +0,0 @@
---
title: >-
Self-Hosting a Blog Mailing List
description: >-
For fun and no profit.
tags: tech
---
As of this week the Mediocre Blog has a new follow mechanism: email! [Sign up
on the **Follow** page][follow] and you'll get an email everytime a new post
is published to the blog. It's like RSS, except there's a slight chance you
might actually use it.
This post will detail my relatively simple setup for this, linking to points
within my blog's server code which are relevant. While I didn't deliberately
package my code up into a nice public package, if you know have some cursory
knowledge of Go you could probably rip my code and make it work for you. Don't
worry, it has a [permissive license](/assets/wtfpl.txt).
[follow]: /follow.html
## Email Server
Self-hosting email is the hardest and most foreign part of this whole
thing for most devs. The long and the short of it is that it's very unlikely you
can do this without renting a VPS somewhere. Luckily there are VPSs out there
which are cheap and which allow SMTP traffic, so it's really just a matter of
biting the cost bullet and letting your definition of "self-hosted" be a bit
flexible. At least you still control the code!
I highly recommend [maddy][maddy] as an email server which has everything you
need out of the box, no docker requirements, and a flexible-yet-simple
configuration language. I've discussed [in previous posts][maddypost] the
general steps I've used to set up maddy on a remote VPS, and so I won't
re-iterate here. Just know that I have a VPS on my private [nebula][nebula] VPN,
with a maddy server listening for outgoing mail on port 587, with
username/password authentication on that port.
[maddy]: https://maddy.email
[maddypost]: {% post_url 2021-07-06-maddy-vps %}
[nebula]: https://github.com/slackhq/nebula
## General API Design
The rest of the system lies within the Go server which hosts my blog. There is
only a single instance of the server, and it runs in my living room. With these
as the baseline environmental requirements, the rest of the design follows
easily:
* The Go server provides [three REST API endpoints][restendpoints]:
- `POST /api/mailinglist/subscribe`: Accepts a POST form argument `email`, sends a
verification email to that email address.
- `POST /api/mailinglist/finalize`: Accepts a POST form argument `subToken`,
which is a random token sent to the user when they subscribe. Only by
finalizing their subscription can a user be considered actually
subscribed.
- `POST /api/mailinglist/unsubscribe`: Accepts a POST form argument
`unsubToken`, which is sent with each blog post notification to the user.
* The static frontend code has [two pages][staticpages] related to the mailing
list:
- `/mailinglist/finalize.html`: The verification email which is sent to the
user links to this page, with the `subToken` as a GET argument. This page
then submits the `subToken` to the `POST /api/mailinglist/finalize`
endpoint.
- `/mailinglist/unsubscribe.html`: Each blog post notification email sent to
users contains a link to this page, with an `unsubToken` as a GET
argument. This page then submits the `unsubToken` to the `POST
/api/mailinglist/unsubscribe` endpoint.
It's a pretty small API, but it covers all the important things, namely
verification (because I don't want people signed up against their will, nor do I
want to be sending emails to fake email addresses), and unsubscribing.
[restendpoints]: https://github.com/mediocregopher/blog.mediocregopher.com/blob/5ca7dadd02fb49dd62ad448d12021359e41beec1/srv/cmd/mediocre-blog/main.go#L169
[staticpages]: https://github.com/mediocregopher/blog.mediocregopher.com/tree/9c3ea8dd803d6f0df768e3ae37f8c4ab2efbcc5c/static/src/mailinglist
## Proof-of-work
It was important to me that someone couldn't just sit and submit random emails
to the `POST /api/mailinglist/subscribe` endpoint in a loop, causing my email
server to eventually get blacklisted. To prevent this I've implemented a simple
proof-of-work (PoW) system, whereby the client must first obtain a PoW
challenge, generate a solution for that challenge (which involves a lot of CPU
time), and then submit that solution as part of the subscribe endpoint call.
Both the [server-side][powserver] and [client-side][powclient] code can be found
in the blog's git repo. You could theoretically view the Go documentation for
the server code on pkg.go.dev, but apparently their bot doesn't like my WTFPL.
When providing a challenge to the client, the server sends back two values: the
seed and the target.
The target is simply a number whose purpose will become apparent in a second.
The seed is a byte-string which encodes:
* Some random bytes.
* An expiration timestamp.
* A target (matching the one returned to the client alongside the seed).
* An HMAC-MD5 which signs all of the above.
When the client submits a valid solution the server checks the HMAC to ensure
that the seed was generated by the server, it checks the expiration to make sure
the client didn't take too long to solve it, and it checks in an [internal
storage][powserverstore] whether that seed hasn't already been solved. Because
the expiration is built into the seed the server doesn't have to store each
solved seed forever, only until the seed has expired.
To generate a solution to the challenge the client does the following:
* Concatenate up to `len(seed)` random bytes onto the original seed given by the
server.
* Calculate the SHA512 of that.
* Parse the first 4 bytes of the resulting hash as a big-endian uint32.
* If that uint32 is less than the target then the random bytes generated in the
first step are a valid solution. Otherwise the client loops back to the first
step.
Finally, a new endpoint was added: `GET /api/pow/challenge`, which returns a PoW
seed and target for the client to solve. Since seeds don't require storage in a
database until _after_ they are solved there are essentially no consequences to
someone spamming this in a loop.
With all of that in place, the `POST /api/mailinglist/subscribe` endpoint
described before now also requires a `powSeed` and a `powSolution` argument. The
[Follow][follow] page, prior to submitting a subscribe request, first retrieves
a PoW challenge, generates a solution, and only _then_ will it submit the
subscribe request.
[powserver]: https://github.com/mediocregopher/blog.mediocregopher.com/blob/9c3ea8dd803d6f0df768e3ae37f8c4ab2efbcc5c/srv/pow/pow.go
[powserverstore]: https://github.com/mediocregopher/blog.mediocregopher.com/blob/5ca7dadd02fb49dd62ad448d12021359e41beec1/srv/pow/store.go
[powclient]: https://github.com/mediocregopher/blog.mediocregopher.com/blob/9c3ea8dd803d6f0df768e3ae37f8c4ab2efbcc5c/static/src/assets/solvePow.js
## Storage
Storage of emails is fairly straightforward: since I'm not running this server
on multiple hosts, I can just use [SQLite][sqlite]. My code for storage in
SQLite can all be found [here][sqlitecode].
My SQLite table has a single table:
```
CREATE TABLE emails (
id TEXT PRIMARY KEY,
email TEXT NOT NULL,
sub_token TEXT NOT NULL,
created_at INTEGER NOT NULL,
unsub_token TEXT,
verified_at INTEGER
)
```
It will probably one day need an index on `sub_token` and `unsub_token`, but I'm
not quite there yet.
The `id` field is generated by first lowercasing the email (because emails are
case-insensitive) and then hashing it. This way I can be sure to identify
duplicates easily. It's still possible for someone to do the `+` trick to get
their email in multiple times, but as long as they verify each one I don't
really care.
[sqlite]: https://sqlite.org/index.html
[sqlitecode]: https://github.com/mediocregopher/blog.mediocregopher.com/blob/5ca7dadd02fb49dd62ad448d12021359e41beec1/srv/mailinglist/store.go
## Publishing
Publishing is quite easy: my [MailingList interface][mailinglistinterface] has a
`Publish` method on it, which loops through all records in the SQLite table,
discards those which aren't verified, and sends an email to the rest containing:
* A pleasant greeting.
* The new post's title and URL.
* An unsubscribe link.
I will then use a command-line interface to call this `Publish` method. I
haven't actually made that interface yet, but no one is subscribed yet so it
doesn't matter.
[mailinglistinterface]: https://github.com/mediocregopher/blog.mediocregopher.com/blob/5ca7dadd02fb49dd62ad448d12021359e41beec1/srv/mailinglist/mailinglist.go#L23
## Easy-Peasy
The hardest part of the whole thing was probably getting maddy set up, with a
close second being trying to decode a hex string to a byte string in javascript
(I tried Crypto-JS, but it wasn't working without dragging in webpack or a bunch
of other nonsense, and vanilla JS doesn't have any way to do it!).
Hopefully reading this will make you consider self-hosting your own blog's
mailing list as well. If we let these big companies keep taking over all
internet functionality then eventually they'll finagle the standards so that
no one can self-host anything, and we'll have to start all over.
And really, do you _need_ tracking code on the emails you send out for your
recipe blog? Just let your users ignore you in peace and quiet.

View File

@ -1,480 +0,0 @@
---
title: >-
The Syntax of Ginger
description: >-
Oh man, this got real fun real quick.
series: ginger
tags: tech
---
Finally I have a syntax for ginger that I'm happy with. This has actually been a
huge roadblock for me up till this point. There's a bit of a chicken-and-the-egg
problem with the syntax: without pinning down the structures underlying the
syntax it's difficult to develop one, but without an idea of syntax it's
difficult to know what structures will be ergonomic to use.
I've been focusing on the structures so far, and have only now pinned down the
syntax. Let's see what it looks like.
## Preface: Conditionals
I've so far written [two][cond1] [posts][cond2] regarding conditionals in
ginger. After more reflection, I think I'm going to stick with my _original_
gut, which was to only have value and tuple vertices (no forks), and to use a
function which accepts both a boolean and two input edges: the first being the
one to take if the boolean is true, and the second being the one to take if it's
false.
Aka, the very first proposal in the [first post][cond1]. It's hard to justify
up-front, but I think once you see it in action with a clean syntax you'll agree
it just kind of works.
[cond1]: {% post_url 2021-03-01-conditionals-in-ginger %}
[cond2]: {% post_url 2021-03-04-conditionals-in-ginger-errata %}
## Designing a Syntax
Ginger is a bit of a strange language. It uses strange datastructures in strange
ways. But approaching the building of a syntax for any language is actually
straightforward: you're designing a serialization protocol.
To pull back a bit, consider a list of words. How would you encode this list in
order to write it to a file? To answer this, let's flip the question: how would
you design a sequence of characters (ie the contents of the file) such that the
reader could reconstruct the list?
Well, constructing the list from a sequence of characters requires being able to
construct it _at all_, so in what ways is the list constructed? For this list,
let's say there's only an append operation, which accepts a list and a value to
append to it, and returns the result.
If we say that append is encoded via wrapping parenthesis around its two
arguments, and that `()` encodes the empty list, then we get a syntax like...
```
(((() foo) bar) baz)
```
...which, in this instance, decodes to a list containing the words, "foo", "bar",
and "baz", in that order.
It's not a pretty syntax, but it demonstrates the method. If you know how the
datastructure is constructed via code, you know what capabilities the syntax must
have and how it needs to fit together.
## gg
All of this amounted to me needing to implement the ginger graph in some other
language, in order to see what features the syntax must have.
A few years ago I had begun an implementation of a graph datastructure in go, to
use as the base (or at least a reference) for ginger. I had called this
implementation `gg` (ginger graph), with the intention that this would also be
the file extension used to hold ginger code (how clever).
The basic qualities I wanted in a graph datastructure for ginger were, and still
are:
* Immutability, ie all operations which modify the structure should return a
copy, leaving the original intact.
* Support for tuples.
* The property that it should be impossible to construct an invalid graph. An
invalid graph might be, for example, one where there is a single node with no
edges.
* Well tested, and reasonably performant.
Coming back to all this after a few years I had expected to have a graph
datastructure implemented, possibly with immutability, but lacking in tuples and
tests. As it turns out I completely underestimated my past self, because as far
as I can tell I had already finished the damn thing, tuples, tests and all.
It looks like that's the point where I stopped, probably for being unsure about
some other aspect of the language, and my motivation fell off. The fact that
I've come back to ginger, after all these years, and essentially rederived the
same language all over again, gives me a lot of confidence that I'm on the right
track (and a lot of respect for my past self for having done all this work!)
The basic API I came up with for building ginger graphs (ggs) looks like this:
```go
package gg
// OpenEdge represents an edge with a source value but no destination value,
// with an optional value on it. On its own an OpenEdge has no meaning, but is
// used as a building block for making Graphs.
type OpenEdge struct{ ... }
// TupleOut constructs an OpenEdge leading from a tuple, which is comprised of
// the given OpenEdges leading into it, with an optional edge value.
func TupleOut(ins []OpenEdge, edgeVal Value) OpenEdge
// ValueOut constructs an OpenEdge leading from a non-tuple value, with an
// optional edge value.
func ValueOut(val, edgeVal Value) OpenEdge
// ZeroGraph is an empty Graph, from which all Graphs are constructed via calls
// to AddValueIn.
var ZeroGraph = &Graph{ ... }
// Graph is an immutable graph structure, formed from a collection of edges
// between values and tuples.
type Graph struct{ ... }
// AddValueIn returns a new Graph which is a copy of the original, with the
// addition of a new edge. The new edge's source and edge value come from the
// given OpenEdge, and the edge's destination value is the given value.
func (g *Graph) AddValueIn(oe OpenEdge, val Value) *Graph
```
The actual API is larger than this, and includes methods to remove edges,
iterate over edges and values, and perform unions and disjoins of ggs. But the
above are the elements which are required only for _making_ ggs, which is all
that a syntax is concerned with.
As a demonstration, here is how one would construct the `min` operation, which
takes two numbers and returns the smaller, using the `gg` package:
```go
// a, b, in, out, if, etc.. are Values which represent the respective symbol.
// a is the result of passing in to the 0 operation, ie a is the 0th element of
// the in tuple.
min := gg.ZeroGraph.AddValueIn(gg.ValueOut(in, 0), a)
// b is the 1st element of the in tuple
min = min.AddValueIn(gg.ValueOut(in, 1), b)
// out is the result of an if which compares a and b together, and which returns
// the lesser.
min = min.AddValueIn(out, gg.TupleOut([]gg.OpenEdge{
gg.TupleOut([]gg.OpenEdge{a, b}, lt),
a,
b,
}, if)
```
And here's a demonstration of how this `min` would be used:
```go
// out is the result of passing 1 and 5 to the min operation.
gg.ZeroGraph.AddValueIn(gg.TupleOut([]gg.OpenEdge{1, 5}, min), out)
```
## Make it Nice
_Technically_ we're done. We have an implementation of the language's underlying
structure, and a syntax which encodes it (ie the ugly ass go syntax above). But
obviously I'm not proposing anyone actually use that.
Another thing I found when digging around in the old ginger repo was a text
file, tucked away in a directory called "sandbox", which had a primitive syntax
which _almost_ worked. I won't copy it here, but you can find it if you care to.
But with that as a foundation I came up with a crude, rough draft spec, which
maps the go syntax to the new syntax.
```
ValueOut(val, edgeVal) : -edgeVal-val
ValueOut(val, null) : -val
TupleOut([]val, edgeVal) : -edgeVal-(val, ...)
TupleOut([]val, null) : -(val, ...)
Graph(openEdge->val, ...) : { val openEdge, ... }
```
A couple things to note about this spec:
* `null` is used to indicate absence of value on an edge. The details of `null`
are yet to be worked out, but we can use this placeholder for now.
* `Graph` is cheating a bit. In the original `gg` implementation a Graph gains
its OpenEdge/Value pairs via successive calls to `AddValueIn`. However, such a
pattern doesn't translate well to text, and since we're dealing purely with
constructing an entire Graph at once we can instead have our Graph syntax
declare all OpenEdge/Value pairs at once.
* It's backwards! Eg where the go syntax does `ValueOut(val, edgeVal)`, the
proposed spec puts the values in the opposite order: `-edgeVal-val`. The
former results in code which is read from input to output, while the latter
results in the opposite: output to input.
This was a tip I picked up from the old text file I found, and the result is
code which is more familiar to an existing programmer. I _think_ (but am
not sure) that it's also more in line with how programming is done mentally,
ie we start with a result and work backwards to figure out what it takes to
get there.
It's possible, though, that I'm wrong, so at this end of this post I'm going
to put some examples of the same code both "forwards" and "backwards" and see
how I feel about it.
With all that said, let's see it in action! Here's `min` implemented in our shiny new syntax:
```
min -{
a -0-in,
b -1-in,
out -if-(
-lt-(-a,-b),
-a,
-b
)
}
```
and then here's it being used:
```
out -min-(-1,-5)
```
## Make it _Nicer_
The most striking feature of this rough draft spec is all the prefix dashes,
such as in the `-min-(-1,-5)` statement. These dashes were included as they make
sense in the context of what the intended human interpretation of the structure
is: two values, `1`, and `5`, are being _piped into_ the two slots of a 2-tuple,
and that 2-tuple is being _piped into_ the `min` operation, the output of which
is being _piped into_ something `out`.
The "piping into" is what the dash represents, which is why the top level values
in the graph, `a`, `b`, and `out`, don't have a preceding dash; they are the
ultimate destinations of the pipes leading to them. But these pipes are
ultimately ugly, and also introduce odd questions like "how do we represent
-1?", so they need to go.
So I've made a second draft, which is only a few changes away from the rough,
but oh man do those changes make a world of difference. Here's the cleaned up
spec:
```
ValueOut(val, edgeVal) : edgeVal(val)
ValueOut(val, null) : val
TupleOut([]val, edgeVal) : edgeVal(val, ...)
TupleOut([]val, null) : (val, ...)
Graph(openEdge->val, ...) : { val = openEdge, ... }
```
The dashes were simply removed, and the `edgeVal` and `val` concatted together.
For `ValueOut(val, edgeVal)` wrapping parenthesis were put around `val`, to
delineate it and `edgeVal`. This conflicts with the syntax for `TupleOut([]val,
edgeVal)`, but that conflict is easy to remedy: when parenthesis wrap only a
single `val` then that is a `ValueOut`, otherwise it's a `TupleOut`.
Another change is to add an `=` between the `val` and `openEdge` in the `Graph`
constructor. This is a purely aesthetic change, but as you'll see it works well.
So let's see it! `min` implemented with this cleaned up syntax:
```
min = {
a = 0(in),
b = 1(in),
out = if(
lt(a,b),
a,
b
)
}
```
And then its use:
```
min(1,5)
```
Well well well, look what we have here: a conventional programming language
syntax! `{`/`}` wrap a scope, and `(`/`)` wrap function arguments and
(optionally) single values. It's a lot clearer now that `0` and `1` are being
used as operations themselves when instantiating `a` and `b`, and `if` is much
more readable.
I was extremely surprised at how well this actually worked out. Despite having
drastically different underpinnings than most languages it ends up looking both
familiar and obvious. How cool!
## Examples Examples Examples
Here's a collection of example programs written in this new syntax. The base
structure of these are borrowed from previous posts, I'm merely translating that
structure into a new form:
```
// decr outputs one less than the input.
decr = { out = add(in, -1) }
// fib accepts a number i, and outputs the ith fibonacci number.
fib = {
inner = {
n = 0(in),
a = 1(in),
b = 2(in),
out = if(zero?(n),
a,
inner(decr(n), b, add(a,b))
)
},
out = inner(in, 0, 1)
}
// map accepts a sequence and a function, and returns a sequence consisting of
// the result of applying the function to each of the elements in the given
// sequence.
map = {
inner = {
mapped-seq = 0(in),
orig-seq = 1(in),
op = 2(in),
i = len(mapped-seq),
// graphs provide an inherent laziness to the language. Just because
// next-el is _defined_ here doesn't mean it's evaluated here at runtime.
// In reality it will only be evaluated if/when evaluating out requires
// evaluating next-el.
next-el = op(i(orig-seq)),
next-mapped-seq = append(mapped-seq, next-el),
out = if(
eq(len(mapped-seq), len(orig-seq)),
mapped-seq,
inner(next-mapped-seq, orig-seq, op)
)
}
// zero-seq returns an empty sequence
out = inner(zero-seq(), 0(in), 1(in))
}
```
## Selpmexa Selpmexa Selpmexa
Our syntax encodes a graph, and a graph doesn't really care if the syntax was
encoded in an input-to-output vs an output-to-input direction. So, as promised,
here's all the above examples, but "backwards":
```
// min returns the lesser of the two numbers it is given
{
(in)0 = a,
(in)1 = b,
(
(a,b)lt,
a,
b
)if = out
} = min
// decr outputs one less than the input.
{ (in, -1)add = out } = decr
// fib accepts a number i, and outputs the ith fibonacci number.
{
{
(in)0 = n,
(in)1 = a,
(in)2 = b,
(
(n)zero?
a,
((n)decr, b, (a,b)add)inner
)if = out
} = inner,
(in, 0, 1)inner = out
} = fib
// map accepts a sequence and a function, and returns a sequence consisting of
// the result of applying the function to each of the elements in the given
// sequence.
{
{
(in)0 = mapped-seq,
(in)1 = orig-seq,
(in)2 = op,
(mapped-seq)len = i,
((orig-seq)i)op = next-el,
(mapped-seq, next-el)append = next-mapped-seq,
(
((mapped-seq)len, (orig-seq)len)eq,
mapped-seq,
(next-mapped-seq, orig-seq, op)inner
)if = out
} = inner,
(()zero-seq, (in)0, (in)1)inner = out
} = map
```
Do these make you itchy? They kind of make me itchy. But... parts of them also
appeal to me.
The obvious reason why these feel wrong to me is the placement of `if`:
```
(
(a,b)lt,
a,
b
)if = out
```
The tuple which is being passed to `if` here is confusing unless you already
know that it's going to be passed to `if`. But on your first readthrough you
won't know that till you get to the end, so you'll be in the dark until then.
For more complex programs I'm sure this problem will compound.
On the other hand, pretty much everything else looks _better_, imo. For example:
```
// copied and slightly modified from the original to make it even more complex
(mapped-seq, ((orig-seq)i)op)append = next-mapped-seq
```
Something like this reads very clearly to me, and requires a lot less mental
backtracking to comprehend. The main difficulty I have is tracking the
parenthesis, but the overall "flow" of data and the order of events is plain to
read.
## Next Steps
The syntax here is not done yet, not by a long shot. If my record with past
posts about ginger (wherein I've "decided" on something and then completely
backtracked in later posts every single time) is any indication then this syntax
won't even look remotely familiar in a very short while. But it's a great
starting point, I think, and raises a lot of good questions.
* Can I make parenthesis chains, a la the last example, more palatable in some
way?
* Should I go with the "backwards" syntax afterall? In a functional style of
programming `if` statements _should_ be in the minority, and so the syntax
which better represents the flow of data in that style might be the way.
* Destructuring of tuples seems to be wanted, as evidenced by all the `a =
0(in)` lines. Should this be reflected in the structure or solely be
syntactical sugar?
* Should the commas be replaced with any whitespace (and make commas count as
whitespace, as clojure has done)? If this is possible then I think they should
be, but I won't know for sure until I begin implementing the parser.
And, surely, many more! I've felt a bit lost with ginger for a _long_ time, but
seeing a real, usable syntax emerge has really invigorated me, and I'll be
tackling it again in earnest soon (fingers crossed).

View File

@ -1,266 +0,0 @@
---
title: >-
Building AppImages with Nix
description: >-
With some process trees thrown in there for fun.
series: nebula
tags: tech
---
It's been a bit since I've written an update on the cryptic nebula project,
almost 5 months (since [this post][lastnix], which wasn't officially part of the
blog series but whatever). Since then it's switched names to "cryptic-net", and
that we would likely use [MinIO](https://min.io/) as our network storage
service, but neither of those is the most interesting update.
The project had been stalled because of a lack of a build system which could
fulfill the following requirements:
* Network configuration (static IP, VPN certificates) of individual hosts is
baked into the binary they run.
* Binaries are completely static; no external dependencies need to exist on the
host in order to run them.
* Each binary runs a composition of multiple sub-services, each being a separate
sub-process, and all of them having been configured to work together (with
some possible glue code on our side) to provide the features we want.
* The builder itself should be deterministic; no matter where it runs it should
produce the same binary given the same input parameters.
Lacking such a build system we're not able to distribute cryptic-net in a way
which "just works"; it would require some kind of configuration, or some kind of
runtime environment to be set up, both of which would be a pain for users. And
lacking a definite build system makes it difficult to move forward on any other
aspect of a project, as it's not clear what may need to be redone in the future
when the build system is decided upon.
## Why not nix-bundle?
My usage of [nix-bundle][nix-bundle] in a [previous post][lastnix] was an
attempt at fulfilling these requirements. Nix in general does very well in
fulfilling all but the second requirement, and nix-bundle was supposed to
fulfill even that by packaging a nix derivation into a static binary.
And all of this it did! Except that the mechanism of nix-bundle is a bit odd.
The process of a nix-bundle'd binary jails itself within a chroot, which it then
uses to fake the `/nix/store` path which nix built binaries expect to exist.
This might work in a lot of cases, but it did not work in ours. For one, [nebula
can't create its network interface when run from inside
nix-bundle's chroot][nix-bundle-issue]. For another, being run in a chroot means
there's going to be strange restrictions on what our binary is able to do and
not.
## AppImage
What we really needed was an [AppImage][appimage]. AppImages are static binaries
which can bundle complex applications, even those which don't expect to be
bundled into single binaries. In this way the end result is the same as
nix-bundle, but the mechanism AppImage uses is different and places far fewer
restrictions on what we can and can't do with our program.
## Building Sub-Services Statically with Nix
It's probably possible to use nix to generate an AppImage which has the
`/nix/store` built into it, similar to what nix-bundle does, and therefore not
worry about whether the binaries it's bundling are static or not. But if your
services are written in sane languages it's not that difficult to build them
statically and dodge the issue.
For example, here is how you build a go binary statically:
```
{
buildGoModule,
fetchFromGitHub,
}:
buildGoModule rec {
pname = "nebula";
version = "1.4.0";
src = fetchFromGitHub {
owner = "slackhq";
repo = pname;
rev = "v${version}";
sha256 = "lu2/rSB9cFD7VUiK+niuqCX9CI2x+k4Pi+U5yksETSU=";
};
vendorSha256 = "p1inJ9+NAb2d81cn+y+ofhxFz9ObUiLgj+9cACa6Jqg=";
doCheck = false;
subPackages = [ "cmd/nebula" "cmd/nebula-cert" ];
CGO_ENABLED=0;
tags = [ "netgo" ];
ldflags = [
"-X main.Build=${version}"
"-w"
"-extldflags=-static"
];
};
```
And here's how to statically build a C binary:
```
{
stdenv,
glibcStatic, # e.g. pkgs.glibc.static
}:
stdenv.mkDerivation rec {
pname = "dnsmasq";
version = "2.85";
src = builtins.fetchurl {
url = "https://www.thekelleys.org.uk/dnsmasq/${pname}-${version}.tar.xz";
sha256 = "sha256-rZjTgD32h+W5OAgPPSXGKP5ByHh1LQP7xhmXh/7jEvo=";
};
nativeBuildInputs = [ glibcStatic ];
makeFlags = [
"LDFLAGS=-static"
"DESTDIR="
"BINDIR=$(out)/bin"
"MANDIR=$(out)/man"
"LOCALEDIR=$(out)/share/locale"
];
};
```
The derivations created by either of these expressions can be plugged right into
the `pkgs.buildEnv` used to create the AppDir (see AppDir section below).
## Process Manager
An important piece of the puzzle for getting cryptic-net into an AppImage was a
process manager. We need something which can run multiple service processes
simultaneously, restart processes which exit unexpectedly, gracefully handle
shutting down all those processes, and coalesce the logs of all processes into a
single stream.
There are quite a few process managers out there which could fit the bill, but
finding any which could be statically compiled ended up not being an easy task.
In the end I decided to see how long it would take me to implement such a
program in go, and hope it would be less time than it would take to get
`circus`, a python program, bundled into the AppImage.
2 hours later, [pmux][pmux] was born! Check it out. It's a go program so
building it looks pretty similar to the nebula builder above, so I won't repeat
it. However I will show the configuration we're using for it within the
AppImage, to show how it ties all the processes together:
```yaml
processes:
- name: nebula
cmd: bin/nebula
args:
- "-config"
- etc/nebula/nebula.yml
- name: dnsmasq
cmd: bin/dnsmasq
args:
- "-d"
- "-C"
- ${dnsmasq}/etc/dnsmasq/dnsmasq.conf
```
## AppDir -> AppImage
Generating an AppImage requires an AppDir. An AppDir is a directory which
contains all files required by a program, rooted to the AppDir. For example, if
the program expects a file to be at `/etc/some/conf`, then that file should be
places in the AppDir at `<AppDir-path>/etc/some/conf`.
[These docs](https://docs.appimage.org/packaging-guide/manual.html#ref-manual)
were very helpful for me in figuring out how to construct the AppDir. I then
used the `pkgs.buildEnv` utility to create an AppDir derivation containing
everything cryptic-net needs to run:
```
appDir = pkgs.buildEnv {
name = "cryptic-net-AppDir";
paths = [
# real directory containing non-built files, e.g. the pmux config
./AppDir
# static binary derivations shown previously
nebula
dnsmasq
pmux
];
};
```
Once the AppDir is built one needs to use `appimagetool` to turn it into an
AppImage. There is an `appimagetool` build in the standard nixpkgs, but
unfortunately it doesn't seem to actually work...
Luckily nix-bundle is working on AppImage support, and includes a custom build
of `appimagetool` which does work!
```
{
fetchFromGitHub,
callPackage,
}: let
src = fetchFromGitHub {
owner = "matthewbauer";
repo = "nix-bundle";
rev = "223f4ffc4179aa318c34dc873a08cb00090db829";
sha256 = "0pqpx9vnjk9h24h9qlv4la76lh5ykljch6g487b26r1r2s9zg7kh";
};
in
callPackage "${src}/appimagetool.nix" {}
```
Using `callPackage` on this expression will give you a functional `appimagetool`
derivation. From there's it's a simple matter of writing a derivation which
generates the AppImage from a created AppDir:
```
{
appDir,
appimagetool,
}:
pkgs.stdenv.mkDerivation {
name = "cryptic-net-AppImage";
src = appDir;
buildInputs = [ appimagetool ];
ARCH = "x86_64"; # required by appimagetool
builder = builtins.toFile "build.sh" ''
source $stdenv/setup
cp -rL "$src" buildAppDir
chmod +w buildAppDir -R
mkdir $out
appimagetool cryptic-net "$out/cryptic-net-bin"
'';
}
```
Running that derivation deterministically spits out a binary at
`result/cryptic-net-bin` which can be executed and run immediately, on any
system using the `x86_46` CPU architecture.
## Fin
I'm extremely hyped to now have the ability to generate binaries for cryptic-net
that people can _just run_, without them worrying about which sub-services that
binary is running under-the-hood. From a usability perspective it's way nicer
than having to tell people to "install docker" or "install nix", and from a dev
perspective we have a really solid foundation on which to build a quite complex
application.
[lastnix]: {% post_url 2021-04-22-composing-processes-into-a-static-binary-with-nix %}
[nix-bundle]: https://github.com/matthewbauer/nix-bundle
[nix-bundle-issue]: https://github.com/matthewbauer/nix-bundle/issues/78
[appimage]: https://appimage.org/
[pmux]: https://github.com/cryptic-io/pmux

View File

@ -1,28 +0,0 @@
---
title: >-
DOG! MONEY!
description: >-
A collection of original NFT artworks.
tags: art crypto
---
<div style="width: 100%; text-align: center; margin: 5rem 0 5rem 0;">
<img src="/assets/dog-money.png" alt="DOG MONEY!" />
</div>
Officially presenting a collection of original artworks, made by me! This was an
ambitious project I set myself out on this weekend, but I'm really happy to say
that I seem to be completing it on time.
You can check out the collection's OpenSea page **[HERE][coll]**.
Each piece is a tesselating image in SVG format. While it's pretty easy to just
churn out different combinations of images like this, I decided to keep the
collection small for... artistic integrity? Lack of time? Artificial scarcity??
Who knows how these things are decided.
It was a fun project, to say the least, and even if nothing comes of it
financially it's nice to have an outlet/portfolio for my art (which usually just
sits in a notebook or git repo somewhere.)
[coll]: https://opensea.io/collection/dog-money-exclamation-point

View File

@ -1,266 +0,0 @@
---
title: >-
Managing a Home Server With Nix
description: >-
Docker is for boomers.
tags: tech
---
My home server has a lot running on it. Some of it I've written about in this
blog previously, some of it I haven't. It's hosting this blog itself, even!
With all of these services comes management overhead, both in terms of managing
packages and configuration. I'm pretty strict about tracking packages and
configuration in version control, and backing up all state I care about in B2,
such that if, _at any moment_, the server is abducted by aliens, I won't have
lost much.
## Docker
Previously I accomplished this with docker. Each service ran in a container
under the docker daemon, with configuration files and state directories shared
in via volume shares. Configuration files could then be stored in a git repo,
and my `docker run` commands were documented in `Makefile`s, because that was
easy.
This approach had drawbacks, notably:
* Docker networking is a pain. To be fair I should have just used
`--network=host` and dodged the issue, but I didn't.
* Docker images aren't actually deterministically built, so if I were to ever
have to rebuild any of the images I was using it I couldn't be sure I'd end up
with the same code as before. For some services this is actually a nagging
security concern in the back of my head.
* File permissions with docker volumes are fucked.
* Who knows how long the current version of docker will support the old ass
images and configuration system I'm using now. Probably not the next 10 years.
And what if dockerhub goes away, or changes its pricing model?
* As previously noted, docker is for boomers.
## Nix
Nix is the new hotness, and it solves all of the above problems quite nicely.
I'm not going to get into too much detail about how nix works here (honestly I'm
not very good at explaining it), but suffice to say I'm switching everything
over, and this post is about how that actually looks in a practical sense.
For the most part I eschew things like [flakes][flakes],
[home-manager][home-manager], and any other frameworks built on nix. While the
framework of the day may come and go, the base nix language should remain
constant.
As before with docker, I have a single git repo being stored privately in a way
I'm confident is secure (which is necessary because it contains some secrets).
At the root of the repo there exists a `pkgs.nix` file, which looks like this:
```
{
src ? builtins.fetchTarball {
name = "nixpkgs-d50923ab2d308a1ddb21594ba6ae064cab65d8ae";
url = "https://github.com/NixOS/nixpkgs/archive/d50923ab2d308a1ddb21594ba6ae064cab65d8ae.tar.gz";
sha256 = "1k7xpymhzb4hilv6a1jp2lsxgc4yiqclh944m8sxyhriv9p2yhpv";
},
}: (import src) {}
```
This file exists to provide a pinned version of `nixpkgs` which will get used
for all services. As long as I don't change this file the tools available to me
for building my services will remain constant forever, no matter what else
happens in the nix ecosystem.
Each directory in the repo corresponds to a service I run. I'll focus on a
particular service, [navidrome][navidrome], for now:
```bash
:: ls -1 navidrome
Makefile
default.nix
navidrome.toml
```
Not much to it!
### default.nix
The first file to look at is the `default.nix`, as that contains
all the logic. The overall file looks like this:
```
let
pkgs = (import ../pkgs.nix) {};
in rec {
entrypoint = ...;
service = ...;
install = ...;
}
```
The file describes an attribute set with three attributes, `entrypoint`,
`service`, and `install`. These form the basic pattern I use for all my
services; pretty much every service I manage has a `default.nix` which has
attributes corresponding to these.
#### Entrypoint
The first `entrypoint`, looks like this:
```
entrypoint = pkgs.writeScript "mediocregopher-navidrome" ''
#!${pkgs.bash}/bin/bash
exec ${pkgs.navidrome}/bin/navidrome --configfile ${./navidrome.toml}
'';
```
The goal here is to provide an executable which can be run directly, and which
will put together all necessary environment and configuration (`navidrome.toml`,
in this case) needed to run the service. Having the entrypoint split out into
its own target, as opposed to inlining it into the service file (defined next),
is convenient for testing; it allows you test _exactly_ what's going to happen
when running the service normally.
#### Service
`service` looks like this:
```
service = pkgs.writeText "mediocregopher-navidrome-service" ''
[Unit]
Description=mediocregopher navidrome
Requires=network.target
After=network.target
[Service]
Restart=always
RestartSec=1s
User=mediocregopher
Group=mediocregopher
LimitNOFILE=10000
# The important part!
ExecStart=${entrypoint}
# EXTRA DIRECTIVES ELIDED, SEE
# https://www.navidrome.org/docs/installation/pre-built-binaries/
[Install]
WantedBy=multi-user.target
'';
```
It's function is to produce a systemd service file. The service file will
reference the `entrypoint` which has already been defined, and in general does
nothing else.
#### Install
`install` looks like this:
```
install = pkgs.writeScript "mediocregopher-navidrome-install" ''
#!${pkgs.bash}/bin/bash
sudo cp ${service} /etc/systemd/system/mediocregopher-navidrome.service
sudo systemctl daemon-reload
sudo systemctl enable mediocregopher-navidrome
sudo systemctl restart mediocregopher-navidrome
'';
```
This attribute produces a script which will install a systemd service on the
system it's run on. Assuming this is done in the context of a functional nix
environment and standard systemd installation it will "just work"; all relevant
binaries, configuration, etc, will all come along for the ride, and the service
will be running _exactly_ what's defined in my repo, everytime. Eat your heart
out, ansible!
Nix is usually used for building things, not _doing_ things, so it may seem
unusual for this to be here. But there's a very good reason for it, which I'll
get to soon.
### Makefile
While `default.nix` _could_ exist alone, and I _could_ just interact with it
directly using `nix-build` commands, I don't like to do that. Most of the reason
is that I don't want to have to _remember_ the `nix-build` commands I need. So
in each directory there's a `Makefile`, which acts as a kind of index of useful
commands. The one for navidrome looks like this:
```
install:
$$(nix-build -A install --no-out-link)
```
Yup, that's it. It builds the `install` attribute, and runs the resulting script
inline. Easy peasy. Other services might have some other targets, like `init`,
which operate the same way but with different script targets.
## Nix Remotely
If you were waiting for me to explain _why_ the install target is in
`default.nix`, rather than just being in the `Makefile` (which would also make
sense), this is the part where I do that.
My home server isn't the only place where I host services, I also have a remote
host which runs some services. These services are defined in this same repo, in
essentially the same way as my local services. The only difference is in the
`Makefile`. Let's look at an example from my `maddy/Makefile`:
```
install-vultr:
nix-build -A install --arg paramsFile ./vultr.nix
nix-copy-closure -s ${VULTR} $$(readlink result)
ssh -tt -q ${VULTR} $$(readlink result)
```
Vultr is the hosting company I'm renting the server from. Apparently I think I
will only ever have one host with them, because I just call it "vultr".
I'll go through this one line at a time. The first line is essentially the same
as the `install` line from my `navidrome` configuration, but with two small
differences: it takes in a parameters file containing the configuration
specific to the vultr host, and it's only _building_ the install script, not
running it.
The second line is the cool part. My remote host has a working nix environment
already, so I can just use `nix-copy-closure` to copy the `install` script to
it. Since the `install` script references the service file, which in turn
references the `entrypoint`, which in turn references the service binary itself,
and all of its configuration, _all_ of it will get synced to the remote host as
part of the `nix-copy-closure` command.
The third line runs the install script remotely. Since `nix-copy-closure`
already copied over all possible dependencies of the service, the end result is
a systemd service running _exactly_ as it would have if I were running it
locally.
All of this said, it's clear that provisioning this remote host in the first
place was pretty simple:
* Add my ssh key (done automatically by Vultr).
* Add my user to sudoers (done automatically by Vultr).
* Install single-user nix (two bash commands from
[here](https://nixos.wiki/wiki/Nix_Installation_Guide#Stable_Nix)).
And that's literally it. No docker, no terraform, no kubernubernetes, no yaml
files... it all "just works". Will it ever require manual intervention? Yeah,
probably... I haven't defined uninstall or stop targets, for instance (though
that would be trivial to do). But overall, for a use-case like mine where I
don't need a lot, I'm quite happy.
That's pretty much the post. Hosting services at home isn't very difficult to
begin with, and with this pattern those of us who use nix can do so with greater
reliability and confidence going forward.
[flakes]: https://nixos.wiki/wiki/Flakes
[home-manager]: https://github.com/nix-community/home-manager
[navidrome]: https://github.com/navidrome/navidrome

View File

@ -1,86 +0,0 @@
---
title: >-
Minting A Single NFT
description: >-
Harder than I'd thought it'd be.
tags: tech art crypto
---
In a [previous post][prev] I made a page to sell some NFTs I had designed. I say
"designed", not "made", because the NFTs don't actually exist yet.
On [OpenSea](https://opensea.io), where those NFTs are listed, the NFT isn't
actually "minted" (created) until first sale. This is primarily done to save the
artist the cost of minting an NFT which no one else is going to buy. There might
be a way to mint outside of a sale on OpenSea, but I haven't dug too much into
it because it doesn't matter.
It doesn't matter because a primary goal here is to not go broke. And OpenSea is
primarily on Ethereum, a blockchain that can't actually be used by normal people
because of the crazy fees. There are some L2s for it, but I don't have any set
up, and keeping an NFT in an L2 feels like borrowed time.
So, as an initial test, I've printed an NFT on Solana, using
[Holaplex][hola]. Solana because it's cheap and fast and
wonderful, and Holaplex because... a lot of reasons.
The main one is that other projects, like [SolSea](https://solsea.io/) and
[AlphaArt](https://www.alpha.art/), require a sign-up just to print NFTs. And
not a crypto signup, where you just connect a wallet. But like a real one, with
an email. [Solanart](https://solanart.io/) requires you to contact them
privately through discord to mint on them!
Why? NFTs are a weird market. A lot of these platforms appear to the
~~customer~~ user more like casino games than anything, where the object is to
find the shiny thing which is going to get popular for one whimsical reason or
another. The artists get paid, the platform takes a cut, and whoever minted the
NFT prays.
For reasons involving the word "rug", the artist, the one who is attaching their
work to an NFT, is not necessarily to be trusted. So there's a lot of mechanisms
within the Solana NFT world to build trust between the audience and the artist.
Things like chain-enforced fair auctions (open to everyone at the same time) and
gatekeeping measures are examples.
Which is all well and good, but I still couldn't mint an NFT.
## Metaplex
So I tried another tact: self-hosting. It's like, my favorite thing? I talk
about it a lot.
I attempted to get [Metaplex][meta] set up locally. Metaplex is an organization,
associated with Solana Labs in some way I think, that's helped develop the NFT
standard on Solana. And they also develop an open-source toolkit for hosting
your own NFT store, complete with NFT minting with no fees or other road blocks.
Sounds perfect!
Except that I'm not a capable enough javascript developer to get it running. I
got as far as the running the Next server and loading the app in my browser, but
a second into running it spits out some error in the console and nothing works
after that. I've spent too much time on it already, I won't go into it more.
So metaplex, for now, is out.
## Holaplex
Until I, somehow, (how actually though...?), found [Holaplex][hola]. It's
a very thinly skinned hosted Metaplex, with a really smooth signup process which
doesn't involve any emails. Each user gets a storefront under their own
subdomain of whatever NFTs they want, and that's it. It's like geocities for
NFTs; pretty much the next best thing to self-hosted.
But to mint an NFT you don't even need to do that, you just hit the "Mint NFTs"
button. So I did that, I uploaded an image, I paid the hosting fee ($2), and
that was it!
You can view my first NFT [here][ghost]! It's not for sale.
I'm hoping that one day I can get back to Metaplex and get it working, I'd much
prefer to have my store hosted myself. But at least this NFT exists now, and I
have a mechanism to make other ones for other people.
[prev]: {% post_url 2021-10-31-dog-money %}
[meta]: https://www.metaplex.com/
[hola]: https://holaplex.com/
[ghost]: https://solscan.io/token/HsFpMvY9j5uy68CSDxRvb5aeoj4L3D4vsAkHsFqKvDYb

View File

@ -1,422 +0,0 @@
---
title: >-
Ginger: It's Alive!
description: >-
The new best language for computing fibonacci numbers.
series: ginger
tags: tech
---
As a kind of Christmas present to myself I took a whole week off of work
specifically to dedicate myself to working on ginger.
My concrete goal was to be able to run a ginger program to compute any Nth
fibonacci number, a goal I chose because it would require the implementation of
conditionals, some kind of looping or recursion, and basic addition/subtraction.
In other words, it would require all the elements which comprise a Turing
complete language.
And you know what? I actually succeeded!
The implementation can be found [here][impl]. At this point ginger is an
interpreted language running in a golang-based VM. The dream is for it to be
self-hosted on LLVM (and other platforms after), but as an intermediate step to
that I decided on sticking to what I know (golang) rather than having to learn
two things at once.
In this post I'm going to describe the components of this VM at a high level,
show a quick demo of it working, and finally talk about the roadmap going
forward.
[impl]: https://github.com/mediocregopher/ginger/tree/ebf57591a8ac08da8a312855fc3a6d9c1ee6dcb2
## Graph
The core package of the whole project is the [`graph`][graph] package. This
package implements a generic directed graph datastructure.
The generic part is worth noting; I was able to take advantage of go's new
generics which are currently [in beta][go118]. I'd read quite a bit on how the
generic system would work even before the beta was announced, so I was able to
hit the ground running and start using them without much issue.
Ginger's unique graph datastructure has been discussed in previous posts in this
series quite a bit, and this latest implementation doesn't deviate much at a
high level. Below are the most up-to-date core datatypes and functions which are
used to construct ginger graphs:
```go
// Value is any value which can be stored within a Graph. Values should be
// considered immutable, ie once used with the graph package their internal
// value does not change.
type Value interface {
Equal(Value) bool
String() string
}
// OpenEdge consists of the edge value (E) and source vertex value (V) of an
// edge in a Graph. When passed into the AddValueIn method a full edge is
// created. An OpenEdge can also be sourced from a tuple vertex, whose value is
// an ordered set of OpenEdges of this same type.
type OpenEdge[E, V Value] struct { ... }
// ValueOut creates a OpenEdge which, when used to construct a Graph, represents
// an edge (with edgeVal attached to it) coming from the vertex containing val.
func ValueOut[E, V Value](edgeVal E, val V) *OpenEdge[E, V]
// TupleOut creates an OpenEdge which, when used to construct a Graph,
// represents an edge (with edgeVal attached to it) coming from the vertex
// comprised of the given ordered-set of input edges.
func TupleOut[E, V Value](edgeVal E, ins ...*OpenEdge[E, V]) *OpenEdge[E, V]
// Graph is an immutable container of a set of vertices. The Graph keeps track
// of all Values which terminate an OpenEdge. E indicates the type of edge
// values, while V indicates the type of vertex values.
type Graph[E, V Value] struct { ... }
// AddValueIn takes a OpenEdge and connects it to the Value vertex containing
// val, returning the new Graph which reflects that connection.
func (*Graph[E, V]) AddValueIn(val V, oe *OpenEdge[E, V]) *Graph[E, V]
// ValueIns returns, if any, all OpenEdges which lead to the given Value in the
// Graph (ie, all those added via AddValueIn).
func (*Graph[E, V]) ValueIns(val Value) []*OpenEdge[E, V]
```
The current `Graph` implementation is _incredibly_ inefficient, it does a lot of
copying, looping, and equality checks which could be optimized out one day.
That's going to be a recurring theme of this post, as I had to perform a
balancing act between actually reaching my goal for the week while not incurring
too much tech debt for myself.
[graph]: https://github.com/mediocregopher/ginger/blob/ebf57591a8ac08da8a312855fc3a6d9c1ee6dcb2/graph/graph.go
[go118]: https://go.dev/blog/go1.18beta1
### MapReduce
There's a final operation I implemented as part of the `graph` package:
[MapReduce][mapreduce]. It's a difficult operation to describe, but I'm going to
do my best in this section for those who are interested. If you don't understand
it, or don't care, just know that `MapReduce` is a generic tool for transforming
graphs.
For a description of `MapReduce` we need to present an example graph:
```
+<--b---
+ \
X <--a--+<--c----+<--f-- A
+ /
+ +<---g---
+<--d--+
+<---h---
\
Y <---------e----------- B
```
Plus signs indicate tuples, and lowercase letters are edge values while upper
case letters are vertex values. The pseudo-code to construct this graph in go
might look like:
```go
g := new(Graph)
fA := ValueOut("f", "A")
g = g.AddValueIn(
"X",
TupleOut(
"a",
TupleOut("b", fA),
TupleOut("c", fA),
TupleOut(
"d",
ValueOut("g", "A"),
ValueOut("h", "B"),
),
),
)
g = g.AddValueIn("e", "B")
```
As can be seen in the [code][mapreduce], `MapReduce`'s first argument is an
`OpenEdge`, _not_ a `Graph`. Fundamentally `MapReduce` is a reduction of the
_dependencies_ of a particular value into a new value; to reduce the
dependencies of multiple values at the same time would be equivalent to looping
over those values and calling `MapReduce` on each individually. Having
`MapReduce` only deal with one edge at a time is more flexible.
So let's focus on a particular `OpenEdge`, the one leading into `X` (returned by
`TupleOut("a", etc...)`. `MapReduce` is going to descend into this `OpenEdge`
recursively, in order to first find all value vertices (ie the leaf vertices,
those without any children of their own).
At this point `MapReduce` will use its second argument, the `mapVal` function,
which accepts a value of one type and returns a value of another type. This
function is called on each value from every value vertex encountered. In this
case both `A` and `B` are connectable from `X`, so `mapVal` will be called on
each _only once_. This is the case even though `A` is connected to multiple
times (once with an edge value of `f`, another with an edge value of `b`).
`mapVal` only gets called once per vertex, not per connection.
With all values mapped, `MapReduce` will begin reducing. For each edge leaving
each value vertex, the `reduceEdge` function is called. `reduceEdge` accepts as
arguments the edge value of the edge and the _mapped value_ (not the original
value) of the vertex, and returns a new value of the same type that `mapVal`
returned. Like `mapVal`, `reduceEdge` will only be called once per edge. In our
example, `<--f--A` is used twice (`b` and `c`), but `reduceEdge` will only be
called on it once.
With each value vertex edge having been reduced, `reduceEdge` is called again on
each edge leaving _those_ edges, which must be tuple edges. An array of the
values returned from the previous `reduceEdge` calls for each of the tuples'
input edges is used as the value argument in the next call. This is done until
the `OpenEdge` is fully reduced into a single value.
To flesh out our example, let's imagine a `mapVal` which returns the input
string repeated twice, and a `reduceEdge` which returns the input values joined
with the edge value, and then wrapped with the edge value (eg `reduceEdge(a, [B,
C]) -> aBaCa`).
Calling `MapReduce` on the edge leading into `X` will then give us the following
calls:
```
# Map the value vertices
mapVal(A) -> AA
mapVal(B) -> BB
# Reduce the value vertex edges
reduceEdge(f, [AA]) -> fAAf
reduceEdge(g, [AA]) -> gAAg
reduceEdge(h, [BB]) -> hBBh
# Reduce tuple vertex edges
reduceEdge(b, [fAAf]) -> bfAAfb
reduceEdge(c, [fAAf]) -> cfAAfc
reduceEdge(d, [gAAg, hBBh]) -> dgAAgdhBBhd
reduceEdge(a, [bfAAfb, cfAAfc, dgAAgdhBBhd]) -> abfAAfbacfAAfcadgAAgdhBBhda
```
Beautiful, exactly what we wanted.
`MapReduce` will prove extremely useful when it comes time for the VM to execute
the graph. It enables the VM to evaluate only the values which are needed to
produce an output, and to only evaluate each value once no matter how many times
it's used. `MapReduce` also takes care of the recursive traversal of the
`Graph`, which simplifies the VM code significantly.
[mapreduce]: https://github.com/mediocregopher/ginger/blob/ebf57591a8ac08da8a312855fc3a6d9c1ee6dcb2/graph/graph.go#L338
## gg
With a generic graph implementation out of the way, it was then required to
define a specific implementation which could be parsed from a file and later
used for execution in the VM.
The file extension used for ginger code is `.gg`, as in "ginger graph" (of
course). The package name for decoding this file format is, therefore, also
called `gg`.
The core datatype for the `gg` package is the [`Value`][ggvalue], since the
`graph` package takes care of essentially everything else in the realm of graph
construction and manipulation. The type definition is:
```go
// Value represents a value which can be serialized by the gg text format.
type Value struct {
// Only one of these fields may be set
Name *string
Number *int64
Graph *Graph
// Optional fields indicating the token which was used to construct this
// Value, if any.
LexerToken *LexerToken
}
type Graph = graph.Graph[Value, Value] // type alias for convenience
```
Note that it's currently only possible to describe three different types in a
`gg` file, and one of them is the `Graph`! These are the only ones needed to
implement a fibonacci function, so they're all I implemented.
The lexing/parsing of `gg` files is not super interesting, you can check out the
package code for more details. The only other thing worth noting is that, for
now, all statements are required to end with a `;`. I had originally wanted to
be less strict with this, and allow newlines and other tokens to indicate the
end of statements, but it was complicating the code and I wanted to move on.
Another small thing worth noting is that I decided to make each entire `.gg`
file implicitly define a graph. So you can imagine each file's contents wrapped
in curly braces.
With the `gg` package out of the way I was able to finally parse ginger
programs! The following is the actual, real-life implementation of the fibonacci
function (though at this point it didn't actually work, because the VM was still
not implemented:
```
out = {
decr = { out = add < (in; -1;); };
n = tupEl < (in; 0;);
a = tupEl < (in; 1;);
b = tupEl < (in; 2;);
out = if < (
isZero < n;
a;
recur < (
decr < n;
b;
add < (a;b;);
);
);
} < (in; 0; 1;);
```
[ggvalue]: https://github.com/mediocregopher/ginger/blob/ebf57591a8ac08da8a312855fc3a6d9c1ee6dcb2/gg/gg.go#L14
## VM
Finally, the meat of all this. If the `graph` and `gg` packages are the sturdy,
well constructed foundations of a tall building, then the `vm` package is the
extremely long, flimsy stick someone propped up vertically so they could say
they built a structure of impressive height.
In other words, it's very likely that the current iteration of the VM will not
be long for this world, and so I won't waste time describing it in super detail.
What I will say about it is that within the `vm` package I've defined a [new
`Value` type][vmvalue], which extends the one defined in `gg`. The necessity of
this was that there are types which cannot be represented syntactically in a
`.gg` file, but which _can_ be used as values within a program being run.
The first of these is the `Operation`, which is essentially a first-class
function. The VM will automatically interpret a graph as an `Operation` when it
is used as an edge value, as has been discussed in previous posts, but there are
also built-in operations (like `if` and `recur`) which cannot be represented as
datastructures, and so it was necessary to introduce a new in-memory type to
properly represent operations.
The second is the `Tuple` type. This may seem strange, as ginger graphs already
have a concept of a tuple. But the ginger graph tuple is a _vertex type_, not a
value type. The distinction is small, but important. Essentially the graph tuple
is a structural element which describes how to create a tuple value, but it is
not yet that value. So we need a new Value type to hold the tuple once it _has_
been created during runtime.
Another thing worth describing about the `vm` package, even though I think they
might change drastically, are [`Thunk`s][thunk]:
```go
// Thunk is returned from the performance of an Operation. When called it will
// return the result of that Operation having been called with the particular
// arguments which were passed in.
type Thunk func() (Value, error)
```
The term "thunk" is borrowed from Haskell, which I don't actually know so I'm
probably using it wrong, but anyway...
A thunk is essentially a value which has yet to be evaluated; the VM knows
exactly _how_ to evaluate it, but it hasn't done so yet. The primary reason for
their existence within ginger is to account for conditionals, ie the `if`
operation. The VM can't evaluate each of an `if`'s arguments all at once, it
must only evaluate the first argument (to obtain a boolean), and then based on
that evaluate the second or third argument.
This is where `graph.MapReduce` comes in. The VM uses `graph.MapReduce` to
reduce each edge in a graph to a `Thunk`, where the `Thunk`'s value is based on
the operation (the edge's value) and the inputs to the edge (which will
themselves be `Thunk`s). Because each `Thunk` represents a potential value, not
an actual one, the VM is able to completely parse the program to be executed
(using `graph.MapReduce`) while allowing conditionals to still work correctly.
[EvaluateEdge][evaledge] is where all that happens, if you're interested, but be
warned that the code is a hot mess right now and it's probably not worth
spending a ton of time understanding it as it will change a lot.
A final thing I'll mention is that the `recur` operation is, I think, broken. Or
probably more accurately, the entire VM is broken in a way which prevents
`recur` from working correctly. It _does_ produce the correct output, so I
haven't prioritized debugging it, but for any large number of iterations it
takes a very long time to run.
[vmvalue]: https://github.com/mediocregopher/ginger/blob/ebf57591a8ac08da8a312855fc3a6d9c1ee6dcb2/vm/vm.go#L18
[thunk]: https://github.com/mediocregopher/ginger/blob/ebf57591a8ac08da8a312855fc3a6d9c1ee6dcb2/vm/op.go#L11
[evaledge]: https://github.com/mediocregopher/ginger/blob/ebf57591a8ac08da8a312855fc3a6d9c1ee6dcb2/vm/scope.go#L29
## Demo
Finally, to show it off! I put together a super stupid `eval` binary which takes
two arguments: a graph to be used as an operation, and a value to be used as an
argument to that operation. It doesn't even read the code from a file, you have
to `cat` it in.
The [README][readme] documents how to run the demo, so if you'd like to do so
then please clone the repo and give it a shot! It should look like this when you
do:
```
# go run ./cmd/eval/main.go "$(cat examples/fib.gg)" 8
21
```
You can put any number you like instead of `8`, but as mentioned, `recur` is
broken so it can take a while for larger numbers.
[readme]: https://github.com/mediocregopher/ginger/blob/ebf57591a8ac08da8a312855fc3a6d9c1ee6dcb2/README.md
## Next Steps
The following are all the things I'd like to address the next time I work on
ginger:
* `gg`
* Allow for newlines (and `)` and `}`) to terminate statements, not just
`;`.
* Allow names to have punctuation characters in them (maybe?).
* Don't read all tokens into memory prior to parsing.
* `vm`
* Fix `recur`.
* Implement tail call optimization.
* General
* A bit of polish on the `eval` tool.
* Expose graph creation, traversal, and transformation functions as
builtins.
* Create plan (if not actually implement it yet) for how code will be
imported from one file to another. Namespacing in general will fall into
this bucket.
* Create plan (if not actually implement it yet) for how users can
extend/replace the lexer/parser.
I don't know _when_ I'll get to work on these next, ginger will come back up in
my rotation of projects eventually. It could be a few months. In the meantime I
hope you're as excited about this progress as I am, and if you have any feedback
I'd love to hear it.
Thanks for reading!

View File

@ -1,155 +0,0 @@
---
title: >-
DAV is All You Need
description: >-
Contacts, calendars, passwords, oh my!
tags: tech
---
For some time now I've been trying to find an alternative solution to Google
Keep for shared note taking. The motivation for this change was two-fold:
* Google sucks, and I'm trying to get their products out of my life in favor of
self-hosted options.
* Google Keep _really_ sucks. Seriously, it can barely load on my Chromebook
because of whatever bloated ass web framework they're using for it. It's just
a note taking app!
So this weekend I buckled down and actually made the switch. The first step was
to find something to switch _to_, however, which ended up being not trivial.
There's a million different options in this space, but surprisingly few which
could fulfill the exact niche we need in our household:
* Fully open-source and open protocol. If it's not open it's not worth the
bother of switching, cause we'll just have to do it all again once whatever
product we switch to gets acqui-hired by a food delivery app.
* Self-hosted using a _simple_ server-side component. I'm talking something that
listens on a public port and saves data to a file on disk, and _that's it_.
No database processes, no message queues, no bullshit. We're not serving a
million users here, there's no reason to broaden the attack surface
unnecessarily.
* Multi-platform support, including mobile. Our primary use-case here is our
grocery list, which needs to be accessible by everyone everywhere.
I've already got a Nextcloud instance running at home, and there is certainly a
notes extension for it, so that could have been an option here. But Nextcloud
very much does not fall into the second point above: it's not simple. It's a
giant PHP app that uses Postgres as a backend, has its own authentication and
session system, and has a plugin system. Frankly, it was easily the biggest
security hole on the entire server, and I wasn't eager to add usage to it.
Happily, I found another solution.
## WebDAV
There's a project called [Joplin](https://joplinapp.org/) which implements a
markdown-based notes system with clients for Android, iPhone, Linux, Mac, and
Windows. Somewhat interestingly there is _not_ a web client for it, but on
further reflection I don't think that's a big deal... no bloated javascript
frameworks to worry about at least.
In addition to their own cloud backend, Joplin supports a number of others, with
the most interesting being WebDAV. WebDAV is an XML-based extension to HTTP
which allows for basic write operations on the server-side, and which uses
HTTP's basic auth for authentication. You can interact with it using curl if you
like, it really can't get simpler.
[Caddy](https://caddyserver.com/) is the server I use to handle all incoming
HTTP requests to my server, and luckily there's a semi-official
[WebDAV](https://github.com/mholt/caddy-webdav) plugin which adds WebDAV
support. With that compiled in, the `Caddyfile` configuration is nothing more
than:
```
hostname.com {
route {
basicauth {
sharedUser sharedPassword
}
webdav {
root /data/webdav
}
}
}
```
With that in place, any Joplin client can be pointed at `hostname.com` using the
shared username/assword, and all data is stored directly to `/data/webdav` by
Caddy. Easy-peasy.
## CardDAV/CalDAV
Where WebDAV is an extension of HTTP to allow for remotely modifying files
genearlly, CardDAV and CalDAV are extensions of WebDAV for managing remote
stores of contacts and calendar events, respectively. At least, that's my
understanding.
Nextcloud has its own Web/Card/CalDAV service, and that's what I had been, up
till this point, using for syncing my contacts and calendar from my phone. But
now that I was setting up a separate WebDAV endpoint, I figured it'd be worth
setting up a separate Card/CalDAV service and get that much closer to getting
off Nextcloud entirely.
There is, as far as I know, no Card or CalDAV extension for Caddy, so I'd still
need a new service running. I came across
[radicale](https://radicale.org/v3.html), which fits the bill nicely. It's a
simple CalDAV and CardDAV server which saves directly to disk, much like the
Caddy WebDAV plugin. With that running, I needed only to add the following to my
`Caddyfile`, above the `webdav` directive:
```
handle /radicale/* {
uri strip_prefix /radicale
reverse_proxy 127.0.0.1:5454 {
header_up X-Script-Name /radicale
}
}
```
Now I could point the [DAVx5](https://www.davx5.com/) app on my phone to
`hostname.com/radicale` and boom, contact and calendar syncing was within reach.
I _did_ have a lot of problems getting DAVx5 working properly, but those were
more to do with Android than self-hosting, and I eventually worked through them.
## Passwords
At this point I considered that the only thing I was still really using
Nextcloud for was password management, a la Lastpass or 1Password. I have a lot
of gripes with Nextcloud's password manager, in addition to my aforementioned
grips with Nextcloud generally, so I thought it was worth seeing if some DAV or
another could be the final nail in Nextcloud's coffin.
A bit of searching around led me to [Tusk](https://subdavis.com/Tusk/), a chrome
extension which allows the chrome browser to fetch a
[KeePassXC](https://keepassxc.org/) database from a WebDAV server, decode it,
and autofill it into a website. Basically perfect. I had only to export my
passwords from Nextcloud as a CSV, import them into a fresh KDBX file using the
KeePassXC GUI, place the file in my WebDAV folder, and point Tusk at that.
I found the whole experience of using Tusk to be extremely pleasant. Everything
is very well labeled and described, and there's appropriate warnings and such in
places where someone might commit a security crime (e.g. using the same password
for WebDAV and their KDBX file).
My one gripe is that it seems to be very slow to unlock the file in practice. I
don't _think_ this has to do with my server, as Joplin is quite responsive, so
it could instead have to do with my KDBX file's decryption difficulty setting.
Perhaps Tusk is doing the decryption in userspace javascript... I'll have to
play with it some.
But it's a small price to be able to turn off Nextcloud completely, which I have
now done. I can sleep easier at night now, knowing there's not some PHP
equivalent to Log4j which is going to bite me in the ass one day while I'm on
vacation.

View File

@ -1,173 +0,0 @@
---
title: >-
Ginger Names
description: >-
Thoughts about a fundamental data type.
tags: tech
series: ginger
---
The ginger language has, so far, 2 core types implemented: numbers and names.
Obviously there will be more coming later, but at this stage of development
these are all that's really needed. Numbers are pretty self explanatory, but
it's worth talking about names a bit.
As they are currently defined, a name's only property is that it can either be
equal or not equal to another name. Syntactically they are encoded as being any
alphanumeric token starting with an alphabetic character. We might _think_ of
them as being strings, but names lack nearly all capabilities that strings have:
they cannot be iterated over, they cannot be concatenated, they cannot be split.
Names can only be compared for equality.
## Utility
The use-case for names is self-explanatory: they are words which identify
something from amongst a group.
Consider your own name. It _might_ have an ostensible meaning. Mine, Brian,
means "high" (as in... like a hill, which is the possible root word). But when
people yell "Brian" across the room I'm in, they don't mean a hill. They mean
me, because that word is used to identify me from amongst others. The etymology
is essentially background information which doesn't matter.
We use names all the time in programming, though we don't always call them that.
Variable names, package names, type names, function names, struct field names.
There's also keys which get used in hash maps, which are essentially names, as
well as enumerations. By defining name as a core type we can cover a lot of
ground.
## Precedence
This is not the first time a name has been used as a core type. Ruby has
symbols, which look like `:this`. Clojure has keywords, which also look like
`:this`, and it has symbols, which look like `this`. Erlang has atoms, which
don't have a prefix and so look like `this`. I can't imagine these are the only
examples. They are all called different things, but they're all essentially the
same thing: a runtime value which can only be compared for equality.
I can't speak much about ruby, but I _can_ speak about clojure and erlang.
Clojure is a LISP language, meaning the language itself is described using the
data types and structures built into the language. Ginger is also a LISP, though
it uses graphs instead of lists.
Clojure keywords are generally used as keys to hash maps, sentinel values, and
enumerations. Besides keywords, clojure also makes use of symbols, which are
used for variable and library names. There seems to be some kind of split
ability on symbols, as they are expected to be separated on their periods when
importing, as in `clojure.java.io`. There's also a quoting mechanism in clojure,
where prefixing a symbol, or other value, with a single quote, like `'this`,
prevents it from being evaluated as a variable or function call.
It's also possible to have something get quoted multiple layers deep, like
`'''this`. This can get confusing.
Erlang is not a LISP language, but it does have atoms. These values are used in
the same way that clojure keywords are used. There is no need for a
corresponding symbol type like clojure has, since erlang is not a LISP and has
no real macro system. Atoms are sort of used like symbols, in that functions and
packages are identified by an atom, and so one can "call" an atom, like
`this()`, in order to evaluate it.
## Just Names
I don't really see the need for clojure's separation between keywords and
symbols. Symbols still need to be quoted in order to prevent evaluation either
way, so you end up with three different entities to juggle (keywords, symbols,
and symbols which won't be evaluated). Erlang's solution is simpler, atoms are
just atoms, and since evaluation is explicit there's no need for quoting. Ginger
names are like erlang atoms in that they are the only tool at hand.
The approaches of erlang vs clojure could be reframed as explicit vs implicit
evaluation of operations calls.
In ginger evaluation is currently done implicitly, but in only two cases:
* A value on an edge is evaluated to the first value which is a graph (which
then gets interpreted as an operation).
* A leaf vertex with a name value is evaluated to the first value which is not a
name.
In all other cases, the value is left as-is. A graph does not need to be quoted,
since the need to evaluate a graph as an operation is already based on its
placement as an edge or not. So the only case left where quoting is needed (if
implicit evaluation continues to be used) is a name on a leaf vertex, as in the
example before.
As an example to explore explicit vs implicit quoting in ginger, if we want to
programatically call the `AddValueIn` method on a graph, which terminates an
open edge into a value, and that value is a name, it might look like this with
implicit evaluation (the clojure-like example):
```
out = addValueIn < (g (quote < someName;) someValue; );
* or, to borrow the clojure syntax, where single quote is a shortcut:
out = addValueIn < (g; 'someName; someValue; );
```
In an explicit evaluation language, which ginger so far has not been and so this
will look weird, we might end up with something like this:
```
out = addValueIn < (eval < g; someName; eval < someValue; );
* with $ as sugar for the `eval`, like ' is a shortcut for `quote` in clojure:`
out = addValueIn < ($g; someName; $someValue; );
```
I don't _like_ either pattern, and since it's such a specific case I feel like
something less obtrusive could come up. So no decisions here yet.
## Uniqueness
There's another idea I haven't really gotten to the bottom of yet. The idea is
that a name, _maybe_, shouldn't be considered equal to the same name unless they
belong to the same graph.
For example:
```
otherFoo = { out = 'foo } < ();
out = equal < ('foo; otherFoo; );
```
This would output false. `otherFoo`'s value is the name `foo`, and the value
it's being compared to is also a name `foo`, but they are from different graphs
and so are not equal. In essence, names are automatically namespaces.
This idea only really makes sense in the context of packages, where a user
(a developer) wants to import functionality from somewhere else and use it
in their program. The code package which is imported will likely use name
values internally to implement its functionality, but it shouldn't need to worry
about naming conflicts with values passed in by the user. While it's possible to
avoid conflicts if a package is designed conscientiously, it's also easy to mess
up if one isn't careful. This becomes especially true when combining
functionality of packages with overlapping functionality, where the data
returned from one might looks _similar_ to that used by the other, but it's not
necessarily true.
On the other hand, this could create some real headaches for the developer, as
they chase down errors which are caused because one `foo` isn't actually the
same as another `foo`.
What it really comes down to is the mechanism which packages use to function as
packages. Forced namespaces will require packages to export all names which they
expect the user to need to work with the package. So the ergonomics of that
exporting, both on the user's and package's side, are really important in order
to make this bearable.
So it's hard to make any progress on determining if this idea is gonna work
until the details of packaging are worked out. But for this idea to work the
packaging is going to need to be designed with it in mind. It's a bit of a
puzzle, and one that I'm going to marinate on longer, in addition to the quoting
of names.
And that's names, their current behavior and possible future behavior. Keep an
eye out for more ginger posts in.... many months, because I'm going to go work
on other things for a while (I say, with a post from a month ago having ended
with the same sentiment).

View File

@ -1,258 +0,0 @@
---
title: >-
The Cryptic Filesystem
description: >-
Hey, I'm brainstorming here!
series: nebula
tags: tech
---
Presently the cryptic-net project has two components: a VPN layer (implemented
using [nebula][nebula], and DNS component which makes communicating across that
VPN a bit nicer. All of this is wrapped up in a nice bow using an AppImage and a
simple process manager. The foundation is laid for adding the next major
component: a filesystem layer.
I've done a lot of research and talking about this layer, and you can see past
posts in this series talking about it. Unfortunately, I haven't really made much
progress on a solution. It really feels like there's nothing out there already
implemented, and we're going to have to do it from scratch.
To briefly recap the general requirements of the cryptic network filesystem
(cryptic-fs), it must have:
* Sharding of the fs dataset, so each node doesn't need to persist the full
dataset.
* Replication factor (RF), so each piece of content must be persisted by at
least N nodes of the clusters.
* Nodes are expected to be semi-permanent. They are expected to be in it for the
long-haul, but they also may flit in and out of existence frequently.
* Each cryptic-fs process should be able to track multiple independent
filesystems, with each node in the cluster not necessarily tracking the same
set of filesystems as the others.
This post is going to be a very high-level design document for what, in my head,
is the ideal implementation of cryptic-fs. _If_ cryptic-fs is ever actually
implemented it will very likely differ from this document in major ways, but one
must start somewhere.
[nebula]: https://github.com/slackhq/nebula
## Merkle DAG
It wouldn't be a modern network filesystem project if there wasn't a [Merkle
DAG][mdag]. The minutia of how a Merkle DAG works isn't super important here,
the important bits are:
* Each file is represented by a content identifier (CID), which is essentially a
consistent hash of the file's contents.
* Each directory is also represented by a CID which is generated by hashing the
CIDs of the directory's files and their metadata.
* Since the root of the filesystem is itself a directory, the entire filesystem
can be represented by a single CID. By tracking the changing root CID all
hosts participating in the network filesystem can cheaply identify the latest
state of the entire filesystem.
A storage system for a Merkle DAG is implemented as a key-value store which maps
CID to directory node or file contents. When nodes in the cluster communicate
about data in the filesystem they will do so using these CIDs; one node might
ask the other "can you give me CID `AAA`", and the other would respond with the
contents of `AAA` without really caring about whether or not that CID points to
a file or directory node or whatever. It's quite a simple system.
As far as actual implementation of the storage component, it's very likely we
could re-use some part of the IPFS code-base rather than implementing this from
scratch.
[mdag]: https://docs.ipfs.io/concepts/merkle-dag/
## Consensus
The cluster of nodes needs to (roughly) agree on some things in order to
function:
* What the current root CID of the filesystem is.
* Which nodes have which CIDs persisted.
These are all things which can change rapidly, and which _every_ node in the
cluster will need to stay up-to-date on. On the other hand, given efficient use
of the boolean tagged CIDs mentioned in the previous section, this is a dataset
which could easily fit in memory even for large filesystems.
I've done a bunch of research here and I'm having trouble finding anything
existing which fits the bill. Most databases expect the set of nodes to be
pretty constant, so that eliminates most of them. Here's a couple of other ideas
I spitballed:
* Taking advantage of the already written [go-ds-crdt][crdt] package which the
[IPFS Cluster][ipfscluster] project uses. My biggest concern with this
project, however, is that the entire history of the CRDT must be stored on
each node, which in our use-case could be a very long history.
* Just saying fuck it and using a giant redis replica-set, where each node in
the cluster is a replica and one node is chosen to be the primary. [Redis
sentinel][sentinel] could be used to decide the current primary. The issue is
that I don't think sentinel is designed to handle hundreds or thousands of
nodes, which places a ceiling on cluster capacity. I'm also not confident that
the primary node could handle hundreds/thousands of replicas syncing from it
nicely; that's not something Redis likes to do.
* Using a blockchain engine like [Tendermint][tendermint] to implement a custom,
private blockchain for the cluster. This could work performance-wise, but I
think it would suffer from the same issue as CRDT.
It seems to me like some kind of WAN-optimized gossip protocol would be the
solution here. Each node already knows which CIDs it itself has persisted, so
what's left is for all nodes to agree on the latest root CID, and to coordinate
who is going to store what long-term.
[crdt]: https://github.com/ipfs/go-ds-crdt
[ipfscluster]: https://cluster.ipfs.io/
[sentinel]: https://redis.io/topics/sentinel
[tendermint]: https://tendermint.com/
### Gossip
The [gossipsub][gossipsub] library which is built into libp2p seems like a good
starting place. It's optimized for WANs and, crucially, is already implemented.
Gossipsub makes use of different topics, onto which peers in the cluster can
publish messages which other peers who are subscribed to those topics will
receive. It makes sense to have a topic-per-filesystem (remember, from the
original requirements, that there can be multiple filesystems being tracked), so
that each node in the cluster can choose for itself which filesystems it cares
to track.
The messages which can get published will be dependent on the different
situations in which nodes will want to communicate, so it's worth enumerating
those.
**Situation #1: Node A wants to obtain a CID**: Node A will send out a
`WHO_HAS:<CID>` message (not the actual syntax) to the topic. Node B (and
possibly others), which has the CID persisted, will respond with `I_HAVE:<CID>`.
The response will be sent directly from B to A, not broadcast over the topic,
since only A cares. The timing of B's response to A could be subject to a delay
based on B's current load, such that another less loaded node might get its
response in first.
From here node A would initiate a download of the CID from B via a direct
connection. If node A has enough space then it will persist the contents of the
CID for the future.
This situation could arise because the user has opened a file in the filesystem
for reading, or has attempted to enumerate the contents of a directory, and the
local storage doesn't already contain that CID.
**Situation #2: Node A wants to delete a CID which it has persisted**: Similar
to #1, Node A needs to first ensure that other nodes have the CID persisted, in
order to maintain the RF across the filesystem. So node A first sends out a
`WHO_HAS:<CID>` message. If >=RF nodes respond with `I_HAVE:<CID>` then node A
can delete the CID from its storage without concern. Otherwise it should not
delete the CID.
**Situation #2a: Node A wants to delete a CID which it has persisted, and which
is not part of the current filesystem**: If the filesystem is in a state where
the CID in question is no longer present in the system, then node A doesn't need
to care about the RF and therefore doesn't need to send any messages.
**Situation #3: Node A wants to update the filesystem root CID**: This is as
simple as sending out a `ROOT:<CID>` message on the topic. Other nodes will
receive this and note the new root.
**Situation #4: Node A wants to know the current filesystem root CID**: Node A
sends out a `ROOT?` message. Other nodes will respond to node A directly telling
it the current root CID.
These describe the circumstances around the messages used across the gossip
protocol in a very shallow way. In order to properly flesh out the behavior of
the consistency mechanism we need to dive in a bit more.
### Optimizations, Replication, and GC
A key optimization worth hitting straight away is to declare that each node will
always immediately persist all directory CIDs whenever a `ROOT:<CID>` message is
received. This will _generally_ only involve a couple of round-trips with the
host which issued the `ROOT:<CID>` message, with opportunity for
parallelization.
This could be a problem if the directory structure becomes _huge_, at which
point it might be worth placing some kind of limit on what percent of storage is
allowed for directory nodes. But really... just have less directories people!
The next thing to dive in on is replication. We've already covered in situation
#1 what happens if a user specifically requests a file. But that's not enough
to ensure the RF of the entire filesystem, as some files might not be requested
by any users except the original user to add the file.
We can note that each node knows when a file has been added to the filesystem,
thanks to each node knowing the full directory tree. So upon seeing that a new
file has been added, a node can issue a `WHO_HAS:<CID>` message for it, and if
less than RF nodes respond then it can persist the CID. This is all assuming
that the node has enough space for the new file.
One wrinkle in that plan is that we don't want all nodes to send the
`WHO_HAS:<CID>` at the same time for the same CID, otherwise they'll all end up
downloading the CID and over-replicating it. A solution here is for each node to
delay it's `WHO_HAS:<CID>` based on how much space it has left for storage, so
nodes with more free space are more eager to pull in new files.
Additionally, we want to have nodes periodically check the replication status of
each CID in the filesystem. This is because nodes might pop in and out of
existence randomly, and the cluster needs to account for that. The way this can
work is that each node periodically picks a CID at random and checks the
replication status of it. If the period between checks is calculated as being
based on number of online nodes in the cluster and the number of CIDs which can
be checked, then it can be assured that all CIDs will be checked within a
reasonable amount of time with minimal overhead.
This dovetails nicely with garbage collection. Given that nodes can flit in and
out of existence, a node might come back from having been down for a time, and
all CIDs it had persisted would then be over-replicated. So the same process
which is checking for under-replicated files will also be checking for
over-replicated files.
### Limitations
This consistency mechanism has a lot of nice properties: it's eventually
consistent, it nicely handles nodes coming in and out of existence without any
coordination between the nodes, and it _should_ be pretty fast for most cases.
However, it has its downsides.
There's definitely room for inconsistency between each node's view of the
filesystem, especially when it comes to the `ROOT:<CID>` messages. If two nodes
issue `ROOT:<CID>` messages at the same time then it's extremely likely nodes
will have a split view of the filesystem, and there's not a great way to
resolve this until another change is made on another node. This is probably the
weakest point of the whole design.
[gossipsub]: https://github.com/libp2p/specs/tree/master/pubsub/gossipsub
## FUSE
The final piece is the FUSE connector for the filesystem, which is how users
actually interact with each filesystem being tracked by their node. This is
actually the easiest component, if we use an idea borrowed from
[Tahoe-LAFS][tahoe], cryptic-fs can expose an SFTP endpoint and that's it.
The idea is that hooking up an existing SFTP implementation to the rest of
cryptic-fs should be pretty straightforward, and then every OS should already
have some kind of mount-SFTP-as-FUSE mechanism already, either built into it or
as an existing application. Exposing an SFTP endpoint also allows a user to
access the cryptic-fs remotely if they want to.
[tahoe]: https://tahoe-lafs.org/trac/tahoe-lafs
## Ok
So all that said, clearly the hard part is the consistency mechanism. It's not
even fully developed in this document, but it's almost there. The next step,
beyond polishing up the consistency mechanism, is going to be roughly figuring
out all the interfaces and types involved in the implementation, planning out
how those will all interact with each other, and then finally an actual
implementation!

View File

@ -1,221 +0,0 @@
---
title: >-
The Case for Open Infrastructure
description: >-
Without using the word which starts with "W" and ends in "3".
tags: tech crypto
---
Today I saw an image which made feel deeply sad. This one:
![keybase](/img/open-infra/keybase.png)
In May of 2020 Keybase, one of my all-time favorite web services, was bought by
Zoom, the international collaboration sensation. Almost precisely since that
moment, Keybase has effectively been put on ice. [The final post on the Keybase
Blog][zoom] is refreshingly up-front about this:
[zoom]: https://keybase.io/blog/keybase-joins-zoom
> Initially, our single top priority is helping to make Zoom even more secure. There are no specific plans for the Keybase app yet. Ultimately Keybase's future is in Zoom's hands, and we'll see where that takes us. Of course, if anything changes about Keybases availability, our users will get plenty of notice.
To be fair, there are still a few devs still handling Github issues and
committing code. The servers have not been shut down, user data is not lost. The
source code, for both client and server, is still available. The lights are
still on! Nevertheless, I've made every effort to move my data off of Keybase.
This hasn't been done lightly. I was a long-time Keybase user, frequent
advocate, and even friend to some of the team. Keybase has a lot of cool
features built on top of its rock-solid private storage, and I used them to
build the foundation of my digital presence over many years. Clearly, I don't
_want_ to leave Keybase. It feels like saying goodbye to an old friend.
But I've seen this movie before, and I know how it ends. At least doing it now
means I can find replacements for each of Keybase's features at my leisure,
rather than being forced into doing so in a scramble just before the hammer
falls.
## Open-Source is not Enough
The source code for Keybase is, right now, fully open source on
[their Github][kb-src]. Why couldn't someone spin up their own copy for
themselves and their friends, give it a stupid name, and keep the party rolling?
[kb-src]: https://github.com/keybase
The reason is that Keybase wasn't designed to be hosted by anyone except the
Keybase team. There's no documentation on how to run the backend, and only a
9-page long string of Github projects to go on. Even if someone were to figure
it out, the system is probably designed to be a globally scalable service, not a
small clone being hosted for the benefit of a few. The design and expense of two
such systems are vastly different, and not always compatible.
While the Keybase _source code_ may be open, the _infrastructure_ is closed.
Infrastructure refers to the computer hardware which hosts data and runs
processes on behalf of a service's users, as opposed to users doing so
themselves. Users generally have very small resources, think a phone or laptop,
which are not always online. If you want to host 22 terabytes of family photos,
you won't be doing so on your laptop.
You might instead upload them to Keybase, in which case Keybase's servers
will hold onto them for you, and make them available to you whenever you want to
view them. You are now a user of the Keybase service, which is hosted on the
Keybase company's infrastructure.
This arrangement, dubbed a "cloud service", is exceedingly common in the tech
industry, and its weakest point is the company part. Even if the code and the
infrastructure are perfect, and users are completely hooked, and the service
could change the world... if there's no money there's no company.
And yet, even if there is no money, the company must still posture as if there
is going to be, in order to attract investors. These investors then pay for the
infrastructure, in exchange for a cut of any future money.
This posturing excludes anyone who's not downstream of the company from
participating in hosting the infrastructure, as they might then deserve some of
the money too. The investors will likely make more by selling the whole company
to a bigger fish than if they dilute their share of the profits. This is
reflected in the design of the infrastructure itself. It's locked down to anyone
not employed, directly or indirectly, by the company.
In the end the services we rely on rank profitability and exclusivity over
usefulness and endurance, and the internet is worse off for it. We can do
better.
## Open Infrastructure
Open infrastructure is the idea that anyone can help host the infrastructure of
a service they care about, without any barriers to them doing so beyond their
ability to manage the hardware.
The developers of an open infrastructure service don't have to actually manage
the infrastructure themselves, a demanding and time-consuming task, so the
service can be built by volunteers or a small company that isn't directly making
money from the service. And because usefulness now ranks above profitability,
the actual needs of the users of this service can be directly addressed.
None of these ideas around open infrastructure are new, though the wording might
be, and there's already quite a bit of progress made in this direction. To get a
feel for this progress it's worth perusing some of the existing projects and
communities in this space. As we do so I'm going to break the open
infrastructure space up into three notable, though frequently overlapping,
parts: self-hosted, federated, and distributed.
### Self-Hosted
Self-hosted services are those designed to be hosted by individuals or
households for their own personal use. They may be hosted on a home-server,
Raspberry Pi, or other similar device. One installation of a self-hosted service
often never interacts with another, as the user's own infrastructure is enough
to accomplish the service's goal.
Examples of self-hosting use-cases include:
* A personal website or blog (like this one)
* Media library platforms (Jellyfin, Ultrasonic)
* Photo and video sync/storage (Piwigo)
* Document archival and search (Paperless)
* Entire "cloud" replacement suites (NextCloud)
Self-hosting is popular among enthusiasts, but is not popular outside that realm
due to the requirement of managing your own infrastructure.
### Federated
Federated services are designed to be hosted by a small to medium sized entity,
and to serve many more users than are involved in the hosting. The entity may be
a company servicing its employees, members of a community hosting services
for their peers, or a paid service hosting for its customers. One installation
of a federated service will often communicate with another by design, so that
users of the one installation may interact with those on another.
Email is the classic example of a federated service. Each user has an account
with someone running an email server, and they use those servers to exchange
messages with users of other email servers. In the past it wasn't uncommon for
an ISP to host a mail server for its customers, and to this day many companies
still manage email servers for their employees.
The line between federated and self-hosted can sometimes be quite blury, as it's
frequently possible for an individual to set up a federated service just for
themselves.
Examples of federation use-cases, besides email, include:
* Chat servers (IRC, Matrix)
* Micro-blogs, aka Twitter replacements (Mastadon, MissKey)
* Code hosting (Gitea)
* Social link aggregators, aka Reddit replacements (Lemmy)
* Video streaming platforms (PeerTube)
### Distributed
Where self-hosted services are hosted by-and-for individuals or very small
groups, and federated services are hosted by small groups for larger groups,
distributed services are hosted by both individuals and groups for _everyone_.
The key differentiator between a federated and a distributed service is that in
a federated service the user interacts with a specific server, or set of
servers, that they have a relationship with. In a distributed service the user
has no relationship with those who run the infrastructure, and so users interact
with whoever they can get ahold of.
Examples of distributed service use-cases include:
* Data distribution (IPFS, BitTorrent magnet links)
* Distributed transaction ledgers (Blockchains, love 'em or hate 'em!)
* Onion routing (Tor)
There aren't many examples of truly distributed services that have stood the
test of time, as the protocols and algorithms used to manage these global
datasets are quite tricky to get right. It's possible that the recent advent of
blockchains will change this, though blockchains usually assume some sort of
direct compensation from users to "miners", a barrier not usually found in
federated services.
-----
I don't delineate these categories in order to direct you to one or the other,
or to exhaustively list _every_ ongoing project in this space. Rather, I hope
this gives you a starting point if you're wondering what sorts of problems are
able to be solved using open infrastructure, and how people are approaching
them. Perhaps there's a problem you care about, and there's a way to solve it
using an open infrastructure based solution.
## It's on us
The ideas presented here aren't some pie-in-the-sky, utopian fantasy; this is
happening, and it's happening for good reasons. Some may argue that
infrastructure can't survive without a profit model to pay for it, or that
for-profit services are of higher quality and so free versions will never take
off, or that if services are free to host then no one will make them. People
said much the same about open source software.
The open source movement has shown that participation, not profitability, is the
primary driver of successful software. Why should it not be the case for our
service infrastructure?
-----
**Footnotes**
* I use a lot of words here whose definitions are by no means agreed upon. I've
done my best to stick to my best understanding of word meanings, and to be
consistent in them. If you disagree with my usage of a word, I'm sorry. It
doesn't mean either of us is wrong, we just speak different.
* There is an existing project, under the OpenStack project, called Open
Infrastructure. It has to do with some kind of standardization around the
infrastructure used for cloud services. Unfortunately, the word
"infrastructure" has a much broader meaning than what they are using it for,
and I don't think there's a better word for what I'm trying to describe.

View File

@ -1,222 +0,0 @@
---
title: >-
Ginger: A Small VM Update
description: >-
It works gooder now.
tags: tech
series: ginger
---
During some recent traveling I had to be pulled away from cryptic-net work for a
while. Instead I managed to spend a few free hours, and the odd international
plane ride, to fix the ginger vm.
The problem, as it stood, was that it only functioned "correctly" in a very
accidental sense. I knew from the moment that I published it that it would get
mostly rewritten immediately.
And so here we are, with a rewritten vm and some new realizations.
## Operation
The `Operation` type was previously defined like so:
```
type Operation interface {
Perform([]Thunk, Operation) (Thunk, error)
}
```
I'm not going to explain it, because it's both confusing and wrong.
One thing that is helpful in a refactor, especially in a strongly typed
language, is to tag certain interfaces as being axiomatic, and conforming the
rest of your changes around those. If those interfaces are simple enough to
apply broadly _and_ accurately describe desired behavior, they will help
pre-decide many difficult decisions you'd otherwise have to make.
So with that mind, I tagged `Operation` as being an axiomatic interface, given
that ginger is aiming to be a functional language (and I'm wondering if I should
just rename `Operation` to `Function`, while I'm at it). The new definition of
the interface is:
```
type Operation interface {
Perform(Value) Value
}
```
`Operation` takes and argument and returns a result, it could not possibly be
boiled down any further. By holding `Operation` to this definition and making
decisions from there, it was pretty clear what the next point of attack was.
## If/Recur
The reason that `Operation` had previously been defined in such a fucked up way
was to support the `if` and `recur` `Operation`s, as if they weren't different
than any other `Operation`s. But truthfully they are different, as they are
actually control flow constructs, and so require capabilities that no other
`Operation` would be allowed to use anyway.
The new implementation reflects this. `if` and `recur` are now both handled
directly by the compiler, while global `Operation`s like `tupEl` are
implementations of the `Operation` interface.
## Compile Step
The previous iteration of the vm hadn't distinguished between a compile step and
a run step. In a way it did both at the same time, by abusing the `Thunk` type.
Separating the two steps, and ditching the `Thunk` type in the process, was the
next major change in the refactoring.
The compile step can be modeled as a function which takes a `Graph` and returns
an `Operation`, where the `Graph`'s `in` and `out` names correspond to the
`Operation`'s argument and return, respectively. The run step then reads an
input from the user, calls the compiled `Operation` with that input, and outputs
the result back to the user.
As an example, given the following program:
```
* six-or-more.gg
max = {
a = tupEl < (in, 0)
b = tupEl < (in, 1)
out = if < (gt < (a, b), a, b)
}
out = max < (in, 6)
```
we want to compile an `Operation` which accepts a number and returns the greater
of that number and 6. I'm going to use anonymous go functions to demonstrate the
anatomy of the compiled `Operation`, as that's what's happening in the current
compiler anyway.
```
// After compilation, this function will be in-memory and usable as an
// Operation.
sixOrMore := func(in Value) Value {
max := func(in Value) Value {
a := tupEl(in, 0)
b := tupEl(in, 1)
if a > b {
return a
}
return b
}
return max(in, 6)
}
```
Or at least, this is what I tried for _initially_. What I found was that it was
easier, in the context of how `graph.MapReduce` works, to make even the leaf
values, e.g. `in`, `0`, `1`, and `6`, map to `Operations` as well. `in` is
replaced with an anonymous function which returns its argument, and the numbers
are replaced with anonymous functions which ignore their argument and always
return their respective number.
So the compiled `Operation` looks more like this:
```
// After compilation, this function will be in-memory and usable as an
// Operation.
sixOrMore := func(in Value) Value {
max := func(in Value) Value {
a := tupEl(
func(in Value) Value { return in }(in),
func(_ Value) Value { return 0}(in),
)
b := tupEl(
func(in Value) Value { return in }(in),
func(_ Value) Value { return 1}(in),
)
if a > b {
return a
}
return b
}
return max(
func(in Value) Value { return in }(in),
func(_ Value) Value { return 6}(in),
)
}
```
This added layer of indirection for all leaf values is not great for
performance, and there's probably further refactoring which could be done to
make the result look more like the original ideal.
To make things a bit messier, even that representation isn't quite accurate to
the current result. The compiler doesn't properly de-duplicate work when
following name values. In other words, everytime `a` is referenced within `max`,
the `Operation` which the compiler produces will recompute `a` via `tupEl`.
So the _actual_ compiled `Operation` looks more like this:
```
// After compilation, this function will be in-memory and usable as an
// Operation.
sixOrMore := func(in Value) Value {
return func(in Value) Value {
if tupEl(func(in Value) Value { return in }(in), func(_ Value) Value { return 0}(in)) >
tupEl(func(in Value) Value { return in }(in), func(_ Value) Value { return 1}(in)) {
return tupEl(func(in Value) Value { return in }(in), func(_ Value) Value { return 0}(in))
}
return tupEl(func(in Value) Value { return in }(in), func(_ Value) Value { return 1}(in))
}(
func(in Value) Value { return in }(in),
func(_ Value) Value { return 6}(in),
)
}
```
Clearly, there's some optimization to be done still.
## Results
While it's still not perfect, the new implementation is far and away better than
the old. This can be seen just in the performance for the fibonacci program:
```
# Previous VM
$ time ./eval "$(cat examples/fib.gg)" 10
55
real 0m8.737s
user 0m9.871s
sys 0m0.309s
```
```
# New VM
$ time ./eval "$(cat examples/fib.gg)" 50
12586269025
real 0m0.003s
user 0m0.003s
sys 0m0.000s
```
They're not even comparable.

View File

@ -1,89 +0,0 @@
---
title: >-
Why Do We Have WiFi Passwords?
description: >-
A possible UX improvement.
tags: tech
---
It's been longer than I'd like since the last post, and unfortunately I don't
have a ton that I can actually show for it. A lot of time has been spent on
cryptic-net, which is coming along great and even has a proper storage mechanism
now! But it also still has some data specific to our own network baked into the
code, so it can't be shown publicly yet.
-----
Since I don't have much I _can_ show, I thought I'd spend a post diving into a
thought I had the other day: **why do we have wifi passwords?**
The question is a bit facetious. Really what I want to ask is the adjacent
question: why do we use usernames _and_ passwords for wifi networks? The
question doesn't make much sense standing alone though, so it wouldn't do as a
title.
In any case, what I'm proposing is that the vast majority of people don't need a
username/password authentication mechanism to secure their wifi network in a
practical way. Rather, most people could get along just fine with a secret token
mechanism.
In the case of wifi networks, a secret token system might be better named a
secret _name_ mechanism. Using this mechanism a router would not broadcast its
own name to be discovered by the user's device, but rather the user inputs the
name into their device themselves. Existing hidden wifi networks work in this
way already, except they also require a password.
I'm not going to look at this from a technical or cryptographical perspective.
Hidden wifi networks work already, I assume that under the hood this wouldn't be
appreciably different. Instead I'd like to highlight how this change affects the
user experience of joining a wifi network.
The current experience is as follows:
* USER discovers the network name and password through external means.
* USER opens "add new wifi network" page on their device.
* USER finds network name in network list, possibly waiting or scrolling if
there are many networks.
* USER selects the network name.
* USER inputs password into text box.
* USER is connected to the wifi.
What could this look like if the network name was secret and there was no
password? There'd be no network list, so the whole process is much slimmer:
* USER discovers the secret network name through external means.
* USER opens "add new wifi network" page on their device.
* USER inputs secret name into text box.
* USER is connected to the wifi.
The result is a 33% reduction in number of steps, and a 50% reduction in number
of things the user has to know. The experience is virtually the same across all
other axis.
So the upside of this proposal is clear, a far better UX, but what are the
downsides? Losing a fun avenue of self-expression in the form of wifi names is
probably the most compelling one I've thought of. There's also corporate
environments to consider (as one always must), where it's more practical to
remove users from the network in a targeted way, by revoking accounts, vs
changing the password for everyone anytime a user needs to be excluded.
Corporate offices can keep their usernames and passwords, I guess, and we
should come up with some other radio-based graffiti mechanism in any case. Let's
just get rid of these pointless extra steps!
-----
That's the post. Making this proposal into reality would require a movement far
larger than I care to organize, so we're just going to put this whole thing in
the "fun, pointless yak-shave" bucket and move along. If you happen to know the
architect of the next wifi protocol maybe slip this their way? Or just copy it
and take the credit yourself, that's fine by me.
What's coming next? I'm taking a break from cryptic to catch up on some house
keeping in the self-hosted arena. I've got a brand new password manager I'd like
to try, as well as some motivation to finish getting my own email server
properly set up (it can currently only send mail). At some point I'd like to get
this blog gemini-ified too. Plus there's some services running in their
vestigial docker containers on my server still, that needs to be remedied.
And somewhere in there I have to move too.

View File

@ -1,248 +0,0 @@
---
layout: page
---
<script async type="module" src="/assets/api.js"></script>
<style>
#messages {
max-height: 65vh;
overflow: auto;
padding-right: 2rem;
}
#messages .message {
border: 1px solid #AAA;
border-radius: 10px;
margin-bottom: 1rem;
padding: 2rem;
overflow: auto;
}
#messages .message .title {
font-weight: bold;
font-size: 120%;
}
#messages .message .secondaryTitle {
font-family: monospace;
color: #CCC;
}
#messages .message p {
font-family: monospace;
margin: 1rem 0 0 0;
}
</style>
<div id="messages"></div>
<span id="fail" style="color: red;"></span>
<script>
const messagesEl = document.getElementById("messages");
let messagesScrolledToBottom = true;
messagesEl.onscroll = () => {
const el = messagesEl;
messagesScrolledToBottom = el.scrollHeight == el.scrollTop + el.clientHeight;
};
function renderMessages(msgs) {
messagesEl.innerHTML = '';
msgs.forEach((msg) => {
const el = document.createElement("div");
el.className = "row message"
const elWithTextContents = (tag, body) => {
const el = document.createElement(tag);
el.appendChild(document.createTextNode(body));
return el;
};
const titleEl = document.createElement("div");
titleEl.className = "title";
el.appendChild(titleEl);
const userNameEl = elWithTextContents("span", msg.userID.name);
titleEl.appendChild(userNameEl);
const secondaryTitleEl = document.createElement("div");
secondaryTitleEl.className = "secondaryTitle";
el.appendChild(secondaryTitleEl);
const dt = new Date(msg.createdAt*1000);
const dtStr
= `${dt.getFullYear()}-${dt.getMonth()+1}-${dt.getDate()}`
+ ` ${dt.getHours()}:${dt.getMinutes()}:${dt.getSeconds()}`;
const userIDEl = elWithTextContents("span", `userID:${msg.userID.id} @ ${dtStr}`);
secondaryTitleEl.appendChild(userIDEl);
const bodyEl = document.createElement("p");
const bodyParts = msg.body.split("\n");
for (const i in bodyParts) {
if (i > 0) bodyEl.appendChild(document.createElement("br"));
bodyEl.appendChild(document.createTextNode(bodyParts[i]));
}
el.appendChild(bodyEl);
messagesEl.appendChild(el);
});
}
(async () => {
const failEl = document.getElementById("fail");
setErr = (msg) => failEl.innerHTML = `${msg} (please refresh the page to retry)`;
try {
const api = await import("/assets/api.js");
const history = await api.call("/api/chat/global/history");
const msgs = history.messages;
// history returns msgs in time descending, but we display them in time
// ascending.
msgs.reverse()
const sinceID = (msgs.length > 0) ? msgs[msgs.length-1].id : "";
const ws = await api.ws("/api/chat/global/listen", {
params: { sinceID },
});
while (true) {
renderMessages(msgs);
// If the user was previously scrolled to the bottom then keep them
// there.
if (messagesScrolledToBottom) {
messagesEl.scrollTop = messagesEl.scrollHeight;
}
const msg = await ws.next();
msgs.push(msg.message);
renderMessages(msgs);
}
} catch (e) {
e = `Failed to fetch message history: ${e}`
setErr(e);
console.error(e);
return;
}
})()
</script>
<style>
#append {
border: 1px dashed #AAA;
border-radius: 10px;
padding: 2rem;
}
#append #appendBody {
font-family: monospace;
}
#append #appendStatus {
color: red;
}
</style>
<form id="append">
<h5>New Message</h5>
<div class="row">
<div class="columns four">
<input class="u-full-width" placeholder="Name" id="appendName" type="text" />
<input class="u-full-width" placeholder="Secret" id="appendSecret" type="password" />
</div>
<div class="columns eight">
<p>
Your name is displayed alongside your message.
Your name+secret is used to generate your userID, which is also
displayed alongside your message.
Other users can validate two messages are from the same person
by comparing the messages' userID.
</p>
</div>
</div>
<div class="row">
<div class="columns twelve">
<textarea
style="font-family: monospace"
id="appendBody"
class="u-full-width"
placeholder="Well thought out statement goes here..."
></textarea>
</div>
</div>
<div class="row">
<div class="columns four">
<input class="u-full-width button-primary" id="appendSubmit" type="button" value="Submit" />
</div>
</div>
<span id="appendStatus"></span>
</form>
<script>
const append = document.getElementById("append");
const appendName = document.getElementById("appendName");
const appendSecret = document.getElementById("appendSecret");
const appendBody = document.getElementById("appendBody");
const appendSubmit = document.getElementById("appendSubmit");
const appendStatus = document.getElementById("appendStatus");
appendSubmit.onclick = async () => {
const appendSubmitOrigValue = appendSubmit.value;
appendSubmit.disabled = true;
appendSubmit.className = "";
appendSubmit.value = "Please hold...";
appendStatus.innerHTML = '';
try {
const api = await import("/assets/api.js");
await api.call('/api/chat/global/append', {
body: {
name: appendName.value,
password: appendSecret.value,
body: appendBody.value,
},
requiresPow: true,
});
appendBody.value = '';
} catch (e) {
appendStatus.innerHTML = e;
} finally {
appendSubmit.disabled = false;
appendSubmit.className = "button-primary";
appendSubmit.value = appendSubmitOrigValue;
}
};
</script>

View File

@ -1,114 +0,0 @@
---
layout: page
title: "Follow the Blog"
nofollow: true
---
<script async type="module" src="/assets/api.js"></script>
Here's your options for receiving updates about new blog posts:
## Option 1: Email
Email is by far my preferred option for notifying followers of new posts.
The entire email list system for this blog, from storing subscriber email
addresses to the email server which sends the notifications out, has been
designed from scratch and is completely self-hosted in my living room.
I solemnly swear that:
* You will never receive an email from this blog except to notify of a new post.
* Your email will never be provided or sold to anyone else for any reason.
With all that said, if you'd like to receive an email everytime a new blog post
is published then input your email below and smash that subscribe button!
<style>
#emailStatus.success {
color: green;
}
#emailStatus.fail {
color: red;
}
</style>
<input type="email" placeholder="name@host.com" id="emailAddress" />
<input class="button-primary" type="submit" value="Subscribe" id="emailSubscribe" />
<span id="emailStatus"></span>
<script>
const emailAddress = document.getElementById("emailAddress");
const emailSubscribe = document.getElementById("emailSubscribe");
const emailSubscribeOrigValue = emailSubscribe.value;
const emailStatus = document.getElementById("emailStatus");
emailSubscribe.onclick = async () => {
const api = await import("/assets/api.js");
emailSubscribe.disabled = true;
emailSubscribe.className = "";
emailSubscribe.value = "Please hold...";
emailStatus.innerHTML = '';
try {
if (!window.isSecureContext) {
throw "The browser environment is not secure.";
}
await api.call('/api/mailinglist/subscribe', {
body: { email: emailAddress.value },
requiresPow: true,
});
emailStatus.className = "success";
emailStatus.innerHTML = "Verification email sent (check your spam folder)";
} catch (e) {
emailStatus.className = "fail";
emailStatus.innerHTML = e;
} finally {
emailSubscribe.disabled = false;
emailSubscribe.className = "button-primary";
emailSubscribe.value = emailSubscribeOrigValue;
}
};
</script>
(HINDSIGHT NOTE: We're like three posts in since I set the mailing list up, and
GMail is already marking me as spam. So... check your spam folders folks!)
## Option 2: RSS
RSS is the classic way to follow any blog. It comes from a time before
aggregators like reddit and twitter stole the show, when people felt capable to
manage their own content feeds. We should use it again.
To follow over RSS give any RSS reader the following URL...
<a href="{{site.url}}/feed.xml">{{site.url}}/feed.xml</a>
...and posts from this blog will show up in your RSS feed as soon as they are
published. There are literally thousands of RSS readers out there. Here's some
recommendations:
* [Google Chrome Browser Extension](https://chrome.google.com/webstore/detail/rss-feed-reader/pnjaodmkngahhkoihejjehlcdlnohgmp)
* [spaRSS](https://f-droid.org/en/packages/net.etuldan.sparss.floss/) is my
preferred android RSS reader, but you'll need to install
[f-droid](https://f-droid.org/) on your device to use it (a good thing to do
anyway, imo).
* [NetNewsWire](https://ranchero.com/netnewswire/) is a good reader for
iPhone/iPad/Mac devices, so I'm told. Their homepage description makes a much
better sales pitch for RSS than I ever could.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 108 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 381 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.0 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.3 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 37 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.5 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 747 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 6.0 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 811 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 9.7 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 154 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 485 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 736 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.5 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 MiB

Some files were not shown because too many files have changed in this diff Show More