Refactor pmuxlib to allow for restarting a single process within a set
This commit is contained in:
parent
b1bc3a1df2
commit
7f5c354d04
10
main.go
10
main.go
@ -1,7 +1,6 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
@ -29,19 +28,18 @@ func main() {
|
|||||||
panic(fmt.Sprintf("couldn't parse cfg file: %v", err))
|
panic(fmt.Sprintf("couldn't parse cfg file: %v", err))
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
p := pmuxlib.NewPmux(cfg, os.Stdout, os.Stderr)
|
||||||
go func() {
|
defer p.Stop()
|
||||||
|
|
||||||
sigCh := make(chan os.Signal, 2)
|
sigCh := make(chan os.Signal, 2)
|
||||||
signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM)
|
signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM)
|
||||||
|
|
||||||
<-sigCh
|
<-sigCh
|
||||||
cancel()
|
|
||||||
|
|
||||||
|
go func() {
|
||||||
<-sigCh
|
<-sigCh
|
||||||
fmt.Fprintln(os.Stderr, "forcefully exiting pmux process, there may be zombie child processes being left behind, good luck!")
|
fmt.Fprintln(os.Stderr, "forcefully exiting pmux process, there may be zombie child processes being left behind, good luck!")
|
||||||
os.Stderr.Sync()
|
os.Stderr.Sync()
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
pmuxlib.Run(ctx, os.Stdout, os.Stderr, cfg)
|
|
||||||
}
|
}
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
processes:
|
processes:
|
||||||
|
|
||||||
# each process must have a name and cmd.
|
# each process must have a name and cmd.
|
||||||
- name: pinger
|
pinger:
|
||||||
cmd: /bin/bash
|
cmd: /bin/bash
|
||||||
args:
|
args:
|
||||||
- "-c"
|
- "-c"
|
||||||
@ -37,7 +37,7 @@ processes:
|
|||||||
|
|
||||||
# This process will not immediately exit when pmux tells it to do so, but pmux
|
# This process will not immediately exit when pmux tells it to do so, but pmux
|
||||||
# will SIGKILL it after sigKillWait has elapsed.
|
# will SIGKILL it after sigKillWait has elapsed.
|
||||||
- name: stubborn-pinger
|
stubborn-pinger:
|
||||||
cmd: /bin/bash
|
cmd: /bin/bash
|
||||||
args:
|
args:
|
||||||
- "-c"
|
- "-c"
|
||||||
|
@ -3,25 +3,30 @@
|
|||||||
package pmuxlib
|
package pmuxlib
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"sync"
|
"sync"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Config struct {
|
type Config struct {
|
||||||
TimeFormat string `yaml:"timeFormat"`
|
TimeFormat string `yaml:"timeFormat"`
|
||||||
Processes []ProcessConfig `yaml:"processes"`
|
|
||||||
|
// Set of processes to run, keyed by their name.
|
||||||
|
Processes map[string]ProcessConfig `yaml:"processes"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run runs the given configuration as if this was a real pmux process. It will
|
// Pmux manages multiple child Processes. Methods on a Pmux instance are _not_
|
||||||
// block until the context is canceled and all child processes have been cleaned
|
// thread-safe.
|
||||||
// up.
|
//
|
||||||
func Run(
|
// Stop must be called on a Pmux before the program has exited, or there may be
|
||||||
ctx context.Context,
|
// a leftover zombie child process.
|
||||||
stdout, stderr io.Writer,
|
type Pmux struct {
|
||||||
cfg Config,
|
processes map[string]*Process
|
||||||
) {
|
sysLogger Logger
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewPmux starts a Pmux with the given configuration.
|
||||||
|
func NewPmux(cfg Config, stdout, stderr io.Writer) *Pmux {
|
||||||
stdoutLogger := newLogger(stdout, logSepStdout, cfg.TimeFormat)
|
stdoutLogger := newLogger(stdout, logSepStdout, cfg.TimeFormat)
|
||||||
defer stdoutLogger.Close()
|
defer stdoutLogger.Close()
|
||||||
|
|
||||||
@ -29,27 +34,51 @@ func Run(
|
|||||||
defer stderrLogger.Close()
|
defer stderrLogger.Close()
|
||||||
|
|
||||||
sysLogger := stderrLogger.withSep(logSepSys)
|
sysLogger := stderrLogger.withSep(logSepSys)
|
||||||
defer sysLogger.Println("exited gracefully, ciao!")
|
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
p := &Pmux{
|
||||||
defer wg.Wait()
|
processes: map[string]*Process{},
|
||||||
|
sysLogger: sysLogger,
|
||||||
|
}
|
||||||
|
|
||||||
for _, cfgProc := range cfg.Processes {
|
for name, cfgProc := range cfg.Processes {
|
||||||
wg.Add(1)
|
stdoutLogger := stdoutLogger.withPName(name)
|
||||||
go func(procCfg ProcessConfig) {
|
stderrLogger := stderrLogger.withPName(name)
|
||||||
defer wg.Done()
|
sysLogger := sysLogger.withPName(name)
|
||||||
|
p.processes[name] = NewProcess(
|
||||||
stdoutLogger := stdoutLogger.withPName(procCfg.Name)
|
cfgProc, stdoutLogger, stderrLogger, sysLogger,
|
||||||
stderrLogger := stderrLogger.withPName(procCfg.Name)
|
|
||||||
sysLogger := sysLogger.withPName(procCfg.Name)
|
|
||||||
|
|
||||||
sysLogger.Println("starting process")
|
|
||||||
defer sysLogger.Println("stopped process handler")
|
|
||||||
|
|
||||||
RunProcess(
|
|
||||||
ctx, stdoutLogger, stderrLogger, sysLogger, procCfg,
|
|
||||||
)
|
)
|
||||||
|
}
|
||||||
|
|
||||||
}(cfgProc)
|
return p
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Restart will block until the child process of the given name has been killed
|
||||||
|
// and a new one has been spawned. If there is no child of the given name then
|
||||||
|
// Restart panics.
|
||||||
|
func (p *Pmux) Restart(name string) {
|
||||||
|
proc, ok := p.processes[name]
|
||||||
|
if !ok {
|
||||||
|
panic(fmt.Sprintf("no process named %q", name))
|
||||||
|
}
|
||||||
|
proc.Restart()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop will block until all child processes have been killed. The Pmux should
|
||||||
|
// not be used again after this.
|
||||||
|
func (p *Pmux) Stop() {
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
|
p.sysLogger.Println("killing child processes")
|
||||||
|
for _, proc := range p.processes {
|
||||||
|
proc := proc
|
||||||
|
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
proc.Stop()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
p.sysLogger.Println("exited gracefully, ciao!")
|
||||||
}
|
}
|
||||||
|
@ -14,12 +14,8 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ProcessConfig is used to configure a process via RunProcess.
|
// ProcessConfig is used to configure a process.
|
||||||
type ProcessConfig struct {
|
type ProcessConfig struct {
|
||||||
|
|
||||||
// Name of the process to be run. This only gets used by RunPmux.
|
|
||||||
Name string
|
|
||||||
|
|
||||||
// Cmd and Args describe the actual process to run.
|
// Cmd and Args describe the actual process to run.
|
||||||
Cmd string `yaml:"cmd"`
|
Cmd string `yaml:"cmd"`
|
||||||
Args []string `yaml:"args"`
|
Args []string `yaml:"args"`
|
||||||
@ -32,7 +28,7 @@ type ProcessConfig struct {
|
|||||||
Dir string `yaml:"dir"`
|
Dir string `yaml:"dir"`
|
||||||
|
|
||||||
// MinWait and MaxWait are the minimum and maximum amount of time between
|
// MinWait and MaxWait are the minimum and maximum amount of time between
|
||||||
// restarts that RunProcess will wait.
|
// restarts that Process will wait.
|
||||||
//
|
//
|
||||||
// MinWait defaults to 1 second.
|
// MinWait defaults to 1 second.
|
||||||
// MaxWait defaults to 64 seconds.
|
// MaxWait defaults to 64 seconds.
|
||||||
@ -40,7 +36,7 @@ type ProcessConfig struct {
|
|||||||
MaxWait time.Duration `yaml:"maxWait"`
|
MaxWait time.Duration `yaml:"maxWait"`
|
||||||
|
|
||||||
// SigKillWait is the amount of time after the process is sent a SIGINT
|
// SigKillWait is the amount of time after the process is sent a SIGINT
|
||||||
// before RunProcess sends it a SIGKILL.
|
// before a SIGKILL is sent.
|
||||||
//
|
//
|
||||||
// Defalts to 10 seconds.
|
// Defalts to 10 seconds.
|
||||||
SigKillWait time.Duration `yaml:"sigKillWait"`
|
SigKillWait time.Duration `yaml:"sigKillWait"`
|
||||||
@ -207,8 +203,11 @@ func RunProcessOnce(
|
|||||||
return cmd.ProcessState.ExitCode(), nil
|
return cmd.ProcessState.ExitCode(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// RunProcess runs a process (configured by ProcessConfig) until the context is
|
// Process is used to manage a running process. Methods on a Process are _not_
|
||||||
// canceled, at which point the process is killed and RunProcess returns.
|
// thread-safe.
|
||||||
|
//
|
||||||
|
// Stop must be called on a Process before the program has exited, or there may
|
||||||
|
// be a leftover zombie child process.
|
||||||
//
|
//
|
||||||
// The process will be restarted if it exits of its own accord. There will be a
|
// The process will be restarted if it exits of its own accord. There will be a
|
||||||
// brief wait time between each restart, with an exponential backoff mechanism
|
// brief wait time between each restart, with an exponential backoff mechanism
|
||||||
@ -216,50 +215,77 @@ func RunProcessOnce(
|
|||||||
//
|
//
|
||||||
// The stdout and stderr of the process will be written to the corresponding
|
// The stdout and stderr of the process will be written to the corresponding
|
||||||
// Loggers. Various runtime events will be written to the sysLogger.
|
// Loggers. Various runtime events will be written to the sysLogger.
|
||||||
func RunProcess(
|
type Process struct {
|
||||||
ctx context.Context,
|
cfg ProcessConfig
|
||||||
stdoutLogger, stderrLogger, sysLogger Logger,
|
stdoutLogger, stderrLogger, sysLogger Logger
|
||||||
cfg ProcessConfig,
|
|
||||||
) {
|
|
||||||
|
|
||||||
cfg = cfg.withDefaults()
|
stopFn context.CancelFunc
|
||||||
|
doneCh chan struct{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewProcess returns a new Process instance based on the given config.
|
||||||
|
func NewProcess(
|
||||||
|
cfg ProcessConfig, stdoutLogger, stderrLogger, sysLogger Logger,
|
||||||
|
) *Process {
|
||||||
|
p := &Process{
|
||||||
|
cfg: cfg.withDefaults(),
|
||||||
|
stdoutLogger: stdoutLogger,
|
||||||
|
stderrLogger: stderrLogger,
|
||||||
|
sysLogger: sysLogger,
|
||||||
|
}
|
||||||
|
p.run()
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Process) run() {
|
||||||
|
var ctx context.Context
|
||||||
|
ctx, p.stopFn = context.WithCancel(context.Background())
|
||||||
|
p.doneCh = make(chan struct{})
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer close(p.doneCh)
|
||||||
|
p.restartLoop(ctx)
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Process) restartLoop(ctx context.Context) {
|
||||||
var wait time.Duration
|
var wait time.Duration
|
||||||
|
|
||||||
for {
|
for {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
exitCode, err := RunProcessOnce(
|
exitCode, err := RunProcessOnce(
|
||||||
ctx,
|
ctx,
|
||||||
stdoutLogger, stderrLogger, sysLogger,
|
p.stdoutLogger, p.stderrLogger, p.sysLogger,
|
||||||
cfg,
|
p.cfg,
|
||||||
)
|
)
|
||||||
took := time.Since(start)
|
took := time.Since(start)
|
||||||
|
|
||||||
|
// TODO check if error was due to StartAfterFunc, change the log if so.
|
||||||
if err != nil {
|
if err != nil {
|
||||||
sysLogger.Printf("exited: %v", err)
|
p.sysLogger.Printf("exited: %v", err)
|
||||||
} else {
|
} else {
|
||||||
sysLogger.Printf("exit code: %d", exitCode)
|
p.sysLogger.Printf("exit code: %d", exitCode)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := ctx.Err(); err != nil {
|
if err := ctx.Err(); err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
for i := range cfg.NoRestartOn {
|
for i := range p.cfg.NoRestartOn {
|
||||||
if cfg.NoRestartOn[i] == exitCode {
|
if p.cfg.NoRestartOn[i] == exitCode {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
wait = ((wait * 2) - took).Truncate(time.Millisecond)
|
wait = ((wait * 2) - took).Truncate(time.Millisecond)
|
||||||
|
|
||||||
if wait < cfg.MinWait {
|
if wait < p.cfg.MinWait {
|
||||||
wait = cfg.MinWait
|
wait = p.cfg.MinWait
|
||||||
} else if wait > cfg.MaxWait {
|
} else if wait > p.cfg.MaxWait {
|
||||||
wait = cfg.MaxWait
|
wait = p.cfg.MaxWait
|
||||||
}
|
}
|
||||||
|
|
||||||
sysLogger.Printf("will restart process in %v", wait)
|
p.sysLogger.Printf("will restart process in %v", wait)
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case <-time.After(wait):
|
case <-time.After(wait):
|
||||||
@ -268,3 +294,17 @@ func RunProcess(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Restart will block until the currently running child process has been killed
|
||||||
|
// and a new one has been spawned.
|
||||||
|
func (p *Process) Restart() {
|
||||||
|
p.Stop()
|
||||||
|
p.run()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop will block until the child process has been killed. The Process should
|
||||||
|
// not be used again after this.
|
||||||
|
func (p *Process) Stop() {
|
||||||
|
p.stopFn()
|
||||||
|
<-p.doneCh
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user