A dumb simple user-space process manager, with internals exposed as a usable go package.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
pmux/pmuxproc/pmuxproc.go

232 lines
5.2 KiB

// Package pmuxproc implements the process management aspects of the pmux
// process.
package pmuxproc
import (
"bufio"
"context"
"errors"
"fmt"
"io"
"os"
"os/exec"
"strings"
"sync"
"time"
)
// Config is used to configure a process via RunProcess.
type Config struct {
// Cmd and Args describe the actual process to run.
Cmd string `yaml:"cmd"`
Args []string `yaml:"args"`
// Env describes the environment variables to set on the process.
Env map[string]string `yaml:"env"`
// Dir is the directory the process will be run in. If not set then the
// process is run in the same directory as this parent process.
Dir string `yaml:"dir"`
// MinWait and MaxWait are the minimum and maximum amount of time between
// restarts that RunProcess will wait.
//
// MinWait defaults to 1 second.
// MaxWait defaults to 64 seconds.
MinWait time.Duration `yaml:"minWait"`
MaxWait time.Duration `yaml:"maxWait"`
// SigKillWait is the amount of time after the process is sent a SIGINT
// before RunProcess sends it a SIGKILL.
//
// Defalts to 10 seconds.
SigKillWait time.Duration `yaml:"sigKillWait"`
// NoRestartOn indicates which exit codes should result in the process not
// being restarted any further.
NoRestartOn []int `yaml:"no_restart_on"`
}
func (cfg Config) withDefaults() Config {
if cfg.MinWait == 0 {
cfg.MinWait = 1 * time.Second
}
if cfg.MaxWait == 0 {
cfg.MaxWait = 64 * time.Second
}
if cfg.SigKillWait == 0 {
cfg.SigKillWait = 10 * time.Second
}
return cfg
}
// RunProcessOnce runs the process described by the Config (though it doesn't
// use all fields from the Config). The process is killed if the context is
// canceled. The exit status of the process is returned, or -1 if the process
// was never started.
//
// It returns nil if the process exits normally with a zero status. It returns
// an error otherwise.
//
// The stdout and stderr of the process will be written to the corresponding
// Loggers. Various runtime events will be written to the sysLogger.
func RunProcessOnce(
ctx context.Context,
stdoutLogger, stderrLogger, sysLogger Logger,
cfg Config,
) (
int, error,
) {
cfg = cfg.withDefaults()
var wg sync.WaitGroup
fwdOutPipe := func(logger Logger, r io.Reader) {
wg.Add(1)
go func() {
defer wg.Done()
bufR := bufio.NewReader(r)
for {
line, err := bufR.ReadString('\n')
if errors.Is(err, io.EOF) {
return
} else if err != nil {
logger.Printf("reading output: %v", err)
return
}
logger.Println(strings.TrimSuffix(line, "\n"))
}
}()
}
cmd := exec.Command(cfg.Cmd, cfg.Args...)
cmd.Dir = cfg.Dir
cmd.Env = os.Environ()
for k, v := range cfg.Env {
cmd.Env = append(cmd.Env, k+"="+v)
}
stdout, err := cmd.StdoutPipe()
if err != nil {
return -1, fmt.Errorf("getting stdout pipe: %w", err)
}
defer stdout.Close()
stderr, err := cmd.StderrPipe()
if err != nil {
return -1, fmt.Errorf("getting stderr pipe: %w", err)
}
defer stderr.Close()
fwdOutPipe(stdoutLogger, stdout)
fwdOutPipe(stderrLogger, stderr)
if err := cmd.Start(); err != nil {
return -1, fmt.Errorf("starting process: %w", err)
}
// go-routine which will sent interrupt if the context is cancelled. Also
// waits on a secondary channel, which is closed when this function returns,
// in order to ensure this go-routine always gets cleaned up.
stopCh := make(chan struct{})
defer close(stopCh)
go func(proc *os.Process) {
select {
case <-ctx.Done():
_ = proc.Signal(os.Interrupt)
case <-stopCh:
return
}
select {
case <-time.After(cfg.SigKillWait):
sysLogger.Println("forcefully killing process")
_ = proc.Signal(os.Kill)
case <-stopCh:
return
}
}(cmd.Process)
wg.Wait()
err = cmd.Wait()
exitCode := cmd.ProcessState.ExitCode()
if err != nil {
return exitCode, fmt.Errorf("process exited: %w", err)
}
return exitCode, nil
}
// RunProcess is a process (configured by Config) until the context is canceled,
// at which point the process is killed and RunProcess returns.
//
// The process will be restarted if it exits of its own accord. There will be a
// brief wait time between each restart, with an exponential backup mechanism so
// that the wait time increases upon repeated restarts.
//
// The stdout and stderr of the process will be written to the corresponding
// Loggers. Various runtime events will be written to the sysLogger.
func RunProcess(
ctx context.Context,
stdoutLogger, stderrLogger, sysLogger Logger,
cfg Config,
) {
cfg = cfg.withDefaults()
var wait time.Duration
for {
start := time.Now()
exitCode, err := RunProcessOnce(
ctx,
stdoutLogger, stderrLogger, sysLogger,
cfg,
)
took := time.Since(start)
if err != nil {
sysLogger.Printf("exit code %d, %v", exitCode, err)
} else {
sysLogger.Println("exit code 0")
}
if err := ctx.Err(); err != nil {
return
}
for i := range cfg.NoRestartOn {
if cfg.NoRestartOn[i] == exitCode {
return
}
}
wait = ((wait * 2) - took).Truncate(time.Millisecond)
if wait < cfg.MinWait {
wait = cfg.MinWait
} else if wait > cfg.MaxWait {
wait = cfg.MaxWait
}
sysLogger.Printf("will restart process in %v", wait)
select {
case <-time.After(wait):
case <-ctx.Done():
return
}
}
}