feat: add reaper sub command to cleanup faulty agents (#4)

This commit is contained in:
Robert Kaussow 2022-07-26 14:01:11 +02:00 committed by GitHub
parent be4cc986f7
commit c16e315630
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 189 additions and 15 deletions

View File

@ -1,2 +1,3 @@
drone-admin
subcommand
(a|A)utoscaler

View File

@ -48,16 +48,18 @@ USAGE:
drone-admin [global options] command [command options] [arguments...]
VERSION:
4039a9c
00d2c63
COMMANDS:
build manage builds
help, h Shows a list of commands or help for one command
build manage build
autoscaler manage autoscaler
help, h Shows a list of commands or help for one command
GLOBAL OPTIONS:
--dry-run disable api calls (default: false) [$DRONE_ADMIN_DRY_RUN]
--help, -h show help (default: false)
--log-level value log level (default: "info") [$DRONE_ADMIN_LOG_LEVEL]
--server value, -s value server address [$DRONE_ADMIN_SERVER]
--server value, -s value server address (accepts multiple inputs) [$DRONE_ADMIN_SERVER]
--token value, -t value server auth token [$DRONE_ADMIN_TOKEN]
--version, -v print the version (default: false)
```
@ -73,3 +75,13 @@ INFO[0001] skip 'example/repo_1', number of 9 builds lower than min value
INFO[0002] prune 1/105 builds from 'example/demo'
INFO[0002] prune 0/56 builds from 'example/cool_project'
```
### Cleanup autoscaler agents
When using the autoscaler, agents sometimes remain in error state in the DB (even if the Drone CI Reaper is enabled). This command tries the destroy agents in error state two times and forces it on the third attempt. For this command the `--server` flag must be set to the address of the autoscaler server(s).
```Shell
drone-admin --token my-secret-token --server https://drone-scaler.excample.com autoscaler reaper
INFO[0000] lookup agents in error state error=1 ok=1 server="https://drone-scaler.excample.com"
INFO[0000] destroy agent agent=agent-G8hHyA0A force=false server="https://drone-scaler.excample.com" triage=1
```

View File

@ -0,0 +1,11 @@
package autoscaler
import "github.com/urfave/cli/v2"
var Command = &cli.Command{
Name: "autoscaler",
Usage: "manage autoscaler",
Subcommands: []*cli.Command{
&autoscalerReaperCmd,
},
}

View File

@ -0,0 +1,124 @@
package autoscaler
import (
"os"
"strings"
"github.com/drone/drone-go/drone"
"github.com/sirupsen/logrus"
"github.com/thegeeklab/drone-admin/admin/client"
"github.com/thegeeklab/drone-admin/admin/util"
"github.com/urfave/cli/v2"
)
var autoscalerReaperCmd = cli.Command{
Name: "reaper",
Usage: "find and kill agents in error state",
Action: autoscalerReaper,
Flags: []cli.Flag{
&cli.StringFlag{
Name: "state-file",
Usage: "state file",
EnvVars: []string{"DRONE_ADMIN_AUTOSCALER_REAPER_STATE_FILE"},
Value: "/tmp/droneclean.gob",
},
},
}
func autoscalerReaper(c *cli.Context) error {
statefile := c.String("state-file")
scaler := c.StringSlice("server")
dry := c.Bool("dry-run")
state := map[string]int{}
force := false
if dry {
logrus.Info("dry-run enabled, no data will be removed")
}
if _, err := os.Stat(statefile); err == nil {
err = util.ReadGob(statefile, &state)
if err != nil {
return err
}
}
for _, scaler := range scaler {
client, err := client.New(scaler, c.String("token"))
if err != nil {
return err
}
servers, err := getServers(client)
if err != nil {
return err
}
serversAll := len(servers)
servers = util.Filter(servers, func(s *drone.Server) bool {
return s.State == "running"
})
searchFields := logrus.Fields{
"server": scaler,
"ok": serversAll,
"error": len(servers),
}
logrus.WithFields(searchFields).Infof("lookup agents in error state")
for _, s := range servers {
state[s.Name]++
triage := state[s.Name]
if state[s.Name] == 3 {
force = true
delete(state, s.Name)
}
foundFields := logrus.Fields{
"server": scaler,
"agent": s.Name,
"triage": triage,
"force": force,
}
logrus.WithFields(foundFields).Infof("destroy agent")
if !dry {
err = serverDestroy(client, s.Name, force)
if err != nil && !strings.Contains(err.Error(), "client error 404") {
return err
}
}
}
}
if !dry {
err := util.WriteGob(statefile, state)
if err != nil {
return err
}
}
return nil
}
func getServers(client drone.Client) ([]*drone.Server, error) {
servers, err := client.ServerList()
if err != nil {
return nil, err
}
servers = util.Filter(servers, func(s *drone.Server) bool {
return s.State != "stopped"
})
return servers, nil
}
func serverDestroy(client drone.Client, server string, force bool) error {
err := client.ServerDelete(server, force)
if err != nil {
return err
}
return nil
}

View File

@ -29,17 +29,11 @@ var buidPruneCmd = cli.Command{
EnvVars: []string{"DRONE_ADMIN_BUILD_PRUNE_KEEP_MIN"},
Value: 10,
},
&cli.BoolFlag{
Name: "dry-run",
Usage: "disable api calls",
EnvVars: []string{"DRONE_ADMIN_BUILD_PRUNE_DRY_RUN"},
Value: false,
},
},
}
func buidPrune(c *cli.Context) error {
client, err := client.New(c.String("server"), c.String("token"))
client, err := client.New(c.StringSlice("server")[0], c.String("token"))
if err != nil {
return err
}

View File

@ -1,5 +1,10 @@
package util
import (
"encoding/gob"
"os"
)
func Filter[T any](vs []T, f func(T) bool) []T {
filtered := make([]T, 0)
for _, v := range vs {
@ -9,3 +14,23 @@ func Filter[T any](vs []T, f func(T) bool) []T {
}
return filtered
}
func WriteGob(filePath string, object interface{}) error {
file, err := os.Create(filePath)
if err == nil {
encoder := gob.NewEncoder(file)
err = encoder.Encode(object)
}
file.Close()
return err
}
func ReadGob(filePath string, object interface{}) error {
file, err := os.Open(filePath)
if err == nil {
decoder := gob.NewDecoder(file)
err = decoder.Decode(object)
}
file.Close()
return err
}

View File

@ -8,6 +8,7 @@ import (
"github.com/joho/godotenv"
"github.com/sirupsen/logrus"
"github.com/thegeeklab/drone-admin/admin/autoscaler"
"github.com/thegeeklab/drone-admin/admin/build"
"github.com/urfave/cli/v2"
)
@ -41,17 +42,22 @@ func main() {
Name: "token",
Aliases: []string{"t"},
Usage: "server auth token",
EnvVars: []string{"DRONE_ADMIN_TOKEN"},
EnvVars: []string{"DRONE_ADMIN_TOKEN", "DRONE_TOKEN"},
Required: true,
},
&cli.StringFlag{
&cli.StringSliceFlag{
Name: "server",
Aliases: []string{"s"},
Usage: "server address",
EnvVars: []string{"DRONE_ADMIN_SERVER"},
EnvVars: []string{"DRONE_ADMIN_SERVER", "DRONE_SERVER"},
Required: true,
},
&cli.BoolFlag{
Name: "dry-run",
Usage: "disable none-read api calls",
EnvVars: []string{"DRONE_ADMIN_DRY_RUN"},
Value: false,
},
},
Before: func(ctx *cli.Context) error {
lvl, err := logrus.ParseLevel(ctx.String("log-level"))
@ -64,6 +70,7 @@ func main() {
},
Commands: []*cli.Command{
build.Command,
autoscaler.Command,
},
}