diff --git a/.dictionary b/.dictionary index d37e9fb..ae6042d 100644 --- a/.dictionary +++ b/.dictionary @@ -1,2 +1,3 @@ drone-admin subcommand +(a|A)utoscaler diff --git a/_docs/_index.md b/_docs/_index.md index a0db801..6bc5c32 100644 --- a/_docs/_index.md +++ b/_docs/_index.md @@ -48,16 +48,18 @@ USAGE: drone-admin [global options] command [command options] [arguments...] VERSION: - 4039a9c + 00d2c63 COMMANDS: - build manage builds - help, h Shows a list of commands or help for one command + build manage build + autoscaler manage autoscaler + help, h Shows a list of commands or help for one command GLOBAL OPTIONS: + --dry-run disable api calls (default: false) [$DRONE_ADMIN_DRY_RUN] --help, -h show help (default: false) --log-level value log level (default: "info") [$DRONE_ADMIN_LOG_LEVEL] - --server value, -s value server address [$DRONE_ADMIN_SERVER] + --server value, -s value server address (accepts multiple inputs) [$DRONE_ADMIN_SERVER] --token value, -t value server auth token [$DRONE_ADMIN_TOKEN] --version, -v print the version (default: false) ``` @@ -73,3 +75,13 @@ INFO[0001] skip 'example/repo_1', number of 9 builds lower than min value INFO[0002] prune 1/105 builds from 'example/demo' INFO[0002] prune 0/56 builds from 'example/cool_project' ``` + +### Cleanup autoscaler agents + +When using the autoscaler, agents sometimes remain in error state in the DB (even if the Drone CI Reaper is enabled). This command tries the destroy agents in error state two times and forces it on the third attempt. For this command the `--server` flag must be set to the address of the autoscaler server(s). + +```Shell +drone-admin --token my-secret-token --server https://drone-scaler.excample.com autoscaler reaper +INFO[0000] lookup agents in error state error=1 ok=1 server="https://drone-scaler.excample.com" +INFO[0000] destroy agent agent=agent-G8hHyA0A force=false server="https://drone-scaler.excample.com" triage=1 +``` diff --git a/admin/autoscaler/autoscaler.go b/admin/autoscaler/autoscaler.go new file mode 100644 index 0000000..44bf070 --- /dev/null +++ b/admin/autoscaler/autoscaler.go @@ -0,0 +1,11 @@ +package autoscaler + +import "github.com/urfave/cli/v2" + +var Command = &cli.Command{ + Name: "autoscaler", + Usage: "manage autoscaler", + Subcommands: []*cli.Command{ + &autoscalerReaperCmd, + }, +} diff --git a/admin/autoscaler/autoscaler_reaper.go b/admin/autoscaler/autoscaler_reaper.go new file mode 100644 index 0000000..cdaf9fb --- /dev/null +++ b/admin/autoscaler/autoscaler_reaper.go @@ -0,0 +1,124 @@ +package autoscaler + +import ( + "os" + "strings" + + "github.com/drone/drone-go/drone" + "github.com/sirupsen/logrus" + "github.com/thegeeklab/drone-admin/admin/client" + "github.com/thegeeklab/drone-admin/admin/util" + "github.com/urfave/cli/v2" +) + +var autoscalerReaperCmd = cli.Command{ + Name: "reaper", + Usage: "find and kill agents in error state", + Action: autoscalerReaper, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "state-file", + Usage: "state file", + EnvVars: []string{"DRONE_ADMIN_AUTOSCALER_REAPER_STATE_FILE"}, + Value: "/tmp/droneclean.gob", + }, + }, +} + +func autoscalerReaper(c *cli.Context) error { + statefile := c.String("state-file") + scaler := c.StringSlice("server") + dry := c.Bool("dry-run") + state := map[string]int{} + force := false + + if dry { + logrus.Info("dry-run enabled, no data will be removed") + } + + if _, err := os.Stat(statefile); err == nil { + err = util.ReadGob(statefile, &state) + if err != nil { + return err + } + } + + for _, scaler := range scaler { + client, err := client.New(scaler, c.String("token")) + if err != nil { + return err + } + + servers, err := getServers(client) + if err != nil { + return err + } + serversAll := len(servers) + servers = util.Filter(servers, func(s *drone.Server) bool { + return s.State == "running" + }) + + searchFields := logrus.Fields{ + "server": scaler, + "ok": serversAll, + "error": len(servers), + } + logrus.WithFields(searchFields).Infof("lookup agents in error state") + + for _, s := range servers { + state[s.Name]++ + triage := state[s.Name] + + if state[s.Name] == 3 { + force = true + delete(state, s.Name) + + } + + foundFields := logrus.Fields{ + "server": scaler, + "agent": s.Name, + "triage": triage, + "force": force, + } + logrus.WithFields(foundFields).Infof("destroy agent") + if !dry { + err = serverDestroy(client, s.Name, force) + if err != nil && !strings.Contains(err.Error(), "client error 404") { + return err + } + } + } + } + + if !dry { + err := util.WriteGob(statefile, state) + if err != nil { + return err + } + } + + return nil +} + +func getServers(client drone.Client) ([]*drone.Server, error) { + servers, err := client.ServerList() + if err != nil { + return nil, err + } + + servers = util.Filter(servers, func(s *drone.Server) bool { + return s.State != "stopped" + }) + + return servers, nil +} + +func serverDestroy(client drone.Client, server string, force bool) error { + err := client.ServerDelete(server, force) + if err != nil { + return err + } + + return nil +} diff --git a/admin/build/build_prune.go b/admin/build/build_prune.go index c77e0a9..f9010f5 100644 --- a/admin/build/build_prune.go +++ b/admin/build/build_prune.go @@ -29,17 +29,11 @@ var buidPruneCmd = cli.Command{ EnvVars: []string{"DRONE_ADMIN_BUILD_PRUNE_KEEP_MIN"}, Value: 10, }, - &cli.BoolFlag{ - Name: "dry-run", - Usage: "disable api calls", - EnvVars: []string{"DRONE_ADMIN_BUILD_PRUNE_DRY_RUN"}, - Value: false, - }, }, } func buidPrune(c *cli.Context) error { - client, err := client.New(c.String("server"), c.String("token")) + client, err := client.New(c.StringSlice("server")[0], c.String("token")) if err != nil { return err } diff --git a/admin/util/util.go b/admin/util/util.go index 30bdef1..5e759a8 100644 --- a/admin/util/util.go +++ b/admin/util/util.go @@ -1,5 +1,10 @@ package util +import ( + "encoding/gob" + "os" +) + func Filter[T any](vs []T, f func(T) bool) []T { filtered := make([]T, 0) for _, v := range vs { @@ -9,3 +14,23 @@ func Filter[T any](vs []T, f func(T) bool) []T { } return filtered } + +func WriteGob(filePath string, object interface{}) error { + file, err := os.Create(filePath) + if err == nil { + encoder := gob.NewEncoder(file) + err = encoder.Encode(object) + } + file.Close() + return err +} + +func ReadGob(filePath string, object interface{}) error { + file, err := os.Open(filePath) + if err == nil { + decoder := gob.NewDecoder(file) + err = decoder.Decode(object) + } + file.Close() + return err +} diff --git a/cmd/drone-admin/main.go b/cmd/drone-admin/main.go index 070bba0..3892064 100644 --- a/cmd/drone-admin/main.go +++ b/cmd/drone-admin/main.go @@ -8,6 +8,7 @@ import ( "github.com/joho/godotenv" "github.com/sirupsen/logrus" + "github.com/thegeeklab/drone-admin/admin/autoscaler" "github.com/thegeeklab/drone-admin/admin/build" "github.com/urfave/cli/v2" ) @@ -41,17 +42,22 @@ func main() { Name: "token", Aliases: []string{"t"}, Usage: "server auth token", - EnvVars: []string{"DRONE_ADMIN_TOKEN"}, + EnvVars: []string{"DRONE_ADMIN_TOKEN", "DRONE_TOKEN"}, Required: true, }, - - &cli.StringFlag{ + &cli.StringSliceFlag{ Name: "server", Aliases: []string{"s"}, Usage: "server address", - EnvVars: []string{"DRONE_ADMIN_SERVER"}, + EnvVars: []string{"DRONE_ADMIN_SERVER", "DRONE_SERVER"}, Required: true, }, + &cli.BoolFlag{ + Name: "dry-run", + Usage: "disable none-read api calls", + EnvVars: []string{"DRONE_ADMIN_DRY_RUN"}, + Value: false, + }, }, Before: func(ctx *cli.Context) error { lvl, err := logrus.ParseLevel(ctx.String("log-level")) @@ -64,6 +70,7 @@ func main() { }, Commands: []*cli.Command{ build.Command, + autoscaler.Command, }, }