mirror of
https://github.com/thegeeklab/drone-admin.git
synced 2024-11-23 04:40:39 +00:00
feat: add reaper sub command to cleanup faulty agents (#4)
This commit is contained in:
parent
be4cc986f7
commit
c16e315630
@ -1,2 +1,3 @@
|
||||
drone-admin
|
||||
subcommand
|
||||
(a|A)utoscaler
|
||||
|
@ -48,16 +48,18 @@ USAGE:
|
||||
drone-admin [global options] command [command options] [arguments...]
|
||||
|
||||
VERSION:
|
||||
4039a9c
|
||||
00d2c63
|
||||
|
||||
COMMANDS:
|
||||
build manage builds
|
||||
help, h Shows a list of commands or help for one command
|
||||
build manage build
|
||||
autoscaler manage autoscaler
|
||||
help, h Shows a list of commands or help for one command
|
||||
|
||||
GLOBAL OPTIONS:
|
||||
--dry-run disable api calls (default: false) [$DRONE_ADMIN_DRY_RUN]
|
||||
--help, -h show help (default: false)
|
||||
--log-level value log level (default: "info") [$DRONE_ADMIN_LOG_LEVEL]
|
||||
--server value, -s value server address [$DRONE_ADMIN_SERVER]
|
||||
--server value, -s value server address (accepts multiple inputs) [$DRONE_ADMIN_SERVER]
|
||||
--token value, -t value server auth token [$DRONE_ADMIN_TOKEN]
|
||||
--version, -v print the version (default: false)
|
||||
```
|
||||
@ -73,3 +75,13 @@ INFO[0001] skip 'example/repo_1', number of 9 builds lower than min value
|
||||
INFO[0002] prune 1/105 builds from 'example/demo'
|
||||
INFO[0002] prune 0/56 builds from 'example/cool_project'
|
||||
```
|
||||
|
||||
### Cleanup autoscaler agents
|
||||
|
||||
When using the autoscaler, agents sometimes remain in error state in the DB (even if the Drone CI Reaper is enabled). This command tries the destroy agents in error state two times and forces it on the third attempt. For this command the `--server` flag must be set to the address of the autoscaler server(s).
|
||||
|
||||
```Shell
|
||||
drone-admin --token my-secret-token --server https://drone-scaler.excample.com autoscaler reaper
|
||||
INFO[0000] lookup agents in error state error=1 ok=1 server="https://drone-scaler.excample.com"
|
||||
INFO[0000] destroy agent agent=agent-G8hHyA0A force=false server="https://drone-scaler.excample.com" triage=1
|
||||
```
|
||||
|
11
admin/autoscaler/autoscaler.go
Normal file
11
admin/autoscaler/autoscaler.go
Normal file
@ -0,0 +1,11 @@
|
||||
package autoscaler
|
||||
|
||||
import "github.com/urfave/cli/v2"
|
||||
|
||||
var Command = &cli.Command{
|
||||
Name: "autoscaler",
|
||||
Usage: "manage autoscaler",
|
||||
Subcommands: []*cli.Command{
|
||||
&autoscalerReaperCmd,
|
||||
},
|
||||
}
|
124
admin/autoscaler/autoscaler_reaper.go
Normal file
124
admin/autoscaler/autoscaler_reaper.go
Normal file
@ -0,0 +1,124 @@
|
||||
package autoscaler
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/drone/drone-go/drone"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/thegeeklab/drone-admin/admin/client"
|
||||
"github.com/thegeeklab/drone-admin/admin/util"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
var autoscalerReaperCmd = cli.Command{
|
||||
Name: "reaper",
|
||||
Usage: "find and kill agents in error state",
|
||||
Action: autoscalerReaper,
|
||||
Flags: []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "state-file",
|
||||
Usage: "state file",
|
||||
EnvVars: []string{"DRONE_ADMIN_AUTOSCALER_REAPER_STATE_FILE"},
|
||||
Value: "/tmp/droneclean.gob",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
func autoscalerReaper(c *cli.Context) error {
|
||||
statefile := c.String("state-file")
|
||||
scaler := c.StringSlice("server")
|
||||
dry := c.Bool("dry-run")
|
||||
state := map[string]int{}
|
||||
force := false
|
||||
|
||||
if dry {
|
||||
logrus.Info("dry-run enabled, no data will be removed")
|
||||
}
|
||||
|
||||
if _, err := os.Stat(statefile); err == nil {
|
||||
err = util.ReadGob(statefile, &state)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, scaler := range scaler {
|
||||
client, err := client.New(scaler, c.String("token"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
servers, err := getServers(client)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
serversAll := len(servers)
|
||||
servers = util.Filter(servers, func(s *drone.Server) bool {
|
||||
return s.State == "running"
|
||||
})
|
||||
|
||||
searchFields := logrus.Fields{
|
||||
"server": scaler,
|
||||
"ok": serversAll,
|
||||
"error": len(servers),
|
||||
}
|
||||
logrus.WithFields(searchFields).Infof("lookup agents in error state")
|
||||
|
||||
for _, s := range servers {
|
||||
state[s.Name]++
|
||||
triage := state[s.Name]
|
||||
|
||||
if state[s.Name] == 3 {
|
||||
force = true
|
||||
delete(state, s.Name)
|
||||
|
||||
}
|
||||
|
||||
foundFields := logrus.Fields{
|
||||
"server": scaler,
|
||||
"agent": s.Name,
|
||||
"triage": triage,
|
||||
"force": force,
|
||||
}
|
||||
logrus.WithFields(foundFields).Infof("destroy agent")
|
||||
if !dry {
|
||||
err = serverDestroy(client, s.Name, force)
|
||||
if err != nil && !strings.Contains(err.Error(), "client error 404") {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !dry {
|
||||
err := util.WriteGob(statefile, state)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getServers(client drone.Client) ([]*drone.Server, error) {
|
||||
servers, err := client.ServerList()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
servers = util.Filter(servers, func(s *drone.Server) bool {
|
||||
return s.State != "stopped"
|
||||
})
|
||||
|
||||
return servers, nil
|
||||
}
|
||||
|
||||
func serverDestroy(client drone.Client, server string, force bool) error {
|
||||
err := client.ServerDelete(server, force)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
@ -29,17 +29,11 @@ var buidPruneCmd = cli.Command{
|
||||
EnvVars: []string{"DRONE_ADMIN_BUILD_PRUNE_KEEP_MIN"},
|
||||
Value: 10,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "dry-run",
|
||||
Usage: "disable api calls",
|
||||
EnvVars: []string{"DRONE_ADMIN_BUILD_PRUNE_DRY_RUN"},
|
||||
Value: false,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
func buidPrune(c *cli.Context) error {
|
||||
client, err := client.New(c.String("server"), c.String("token"))
|
||||
client, err := client.New(c.StringSlice("server")[0], c.String("token"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -1,5 +1,10 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"encoding/gob"
|
||||
"os"
|
||||
)
|
||||
|
||||
func Filter[T any](vs []T, f func(T) bool) []T {
|
||||
filtered := make([]T, 0)
|
||||
for _, v := range vs {
|
||||
@ -9,3 +14,23 @@ func Filter[T any](vs []T, f func(T) bool) []T {
|
||||
}
|
||||
return filtered
|
||||
}
|
||||
|
||||
func WriteGob(filePath string, object interface{}) error {
|
||||
file, err := os.Create(filePath)
|
||||
if err == nil {
|
||||
encoder := gob.NewEncoder(file)
|
||||
err = encoder.Encode(object)
|
||||
}
|
||||
file.Close()
|
||||
return err
|
||||
}
|
||||
|
||||
func ReadGob(filePath string, object interface{}) error {
|
||||
file, err := os.Open(filePath)
|
||||
if err == nil {
|
||||
decoder := gob.NewDecoder(file)
|
||||
err = decoder.Decode(object)
|
||||
}
|
||||
file.Close()
|
||||
return err
|
||||
}
|
||||
|
@ -8,6 +8,7 @@ import (
|
||||
|
||||
"github.com/joho/godotenv"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/thegeeklab/drone-admin/admin/autoscaler"
|
||||
"github.com/thegeeklab/drone-admin/admin/build"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
@ -41,17 +42,22 @@ func main() {
|
||||
Name: "token",
|
||||
Aliases: []string{"t"},
|
||||
Usage: "server auth token",
|
||||
EnvVars: []string{"DRONE_ADMIN_TOKEN"},
|
||||
EnvVars: []string{"DRONE_ADMIN_TOKEN", "DRONE_TOKEN"},
|
||||
Required: true,
|
||||
},
|
||||
|
||||
&cli.StringFlag{
|
||||
&cli.StringSliceFlag{
|
||||
Name: "server",
|
||||
Aliases: []string{"s"},
|
||||
Usage: "server address",
|
||||
EnvVars: []string{"DRONE_ADMIN_SERVER"},
|
||||
EnvVars: []string{"DRONE_ADMIN_SERVER", "DRONE_SERVER"},
|
||||
Required: true,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "dry-run",
|
||||
Usage: "disable none-read api calls",
|
||||
EnvVars: []string{"DRONE_ADMIN_DRY_RUN"},
|
||||
Value: false,
|
||||
},
|
||||
},
|
||||
Before: func(ctx *cli.Context) error {
|
||||
lvl, err := logrus.ParseLevel(ctx.String("log-level"))
|
||||
@ -64,6 +70,7 @@ func main() {
|
||||
},
|
||||
Commands: []*cli.Command{
|
||||
build.Command,
|
||||
autoscaler.Command,
|
||||
},
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user