docker-tidy/dockertidy/garbage_collector.py

334 lines
12 KiB
Python
Raw Permalink Normal View History

2020-03-01 18:42:29 +01:00
#!/usr/bin/env python3
"""Remove unused docker containers and images."""
2015-07-01 00:33:43 +02:00
import fnmatch
2020-03-01 18:42:29 +01:00
from collections import namedtuple
2015-07-01 00:33:43 +02:00
import dateutil.parser
import docker.errors
import requests.exceptions
2020-11-16 01:02:32 +01:00
import docker
from dockertidy.config import SingleConfig
from dockertidy.logger import SingleLog
from dockertidy.parser import timedelta
2015-07-01 00:33:43 +02:00
2020-03-05 23:51:21 +01:00
class GarbageCollector:
2020-03-06 22:39:32 +01:00
"""Garbage collector object to handle cleanup tasks of container, images and volumes."""
# This seems to be something docker uses for a null/zero date
YEAR_ZERO = "0001-01-01T00:00:00Z"
ExcludeLabel = namedtuple("ExcludeLabel", ["key", "value"])
2020-03-05 23:51:21 +01:00
def __init__(self):
self.config = SingleConfig()
self.log = SingleLog()
self.logger = SingleLog().logger
2020-03-06 22:39:32 +01:00
self.docker = self._get_docker_client()
def cleanup_containers(self):
"""Identify old containers and remove them."""
config = self.config.config
client = self.docker
2020-03-09 01:05:17 +01:00
all_containers = self._get_all_containers()
filtered_containers = self._filter_excluded_containers(all_containers)
self.logger.info(
"Removing containers older than '{}'".format(
timedelta(config["gc"]["max_container_age"], dt_format="%Y-%m-%d, %H:%M:%S")
)
)
2020-03-06 22:39:32 +01:00
for container_summary in reversed(list(filtered_containers)):
container = self._api_call(
client.inspect_container,
container=container_summary["Id"],
)
if not container or not self._should_remove_container(
2020-03-05 23:51:21 +01:00
container,
2020-03-09 01:05:17 +01:00
timedelta(config["gc"]["max_container_age"]),
2020-03-06 22:39:32 +01:00
):
continue
self.logger.info(
2020-03-09 01:05:17 +01:00
"Removing container {} {} {}".format(
container["Id"][:16],
container.get("Name", "").lstrip("/"), container["State"]["FinishedAt"]
)
2020-03-06 22:39:32 +01:00
)
2015-07-01 00:33:43 +02:00
2020-03-06 22:39:32 +01:00
if not config["dry_run"]:
self._api_call(
client.remove_container,
container=container["Id"],
v=True,
)
2015-07-01 00:33:43 +02:00
2020-03-06 22:39:32 +01:00
def _filter_excluded_containers(self, containers):
config = self.config.config
2015-07-01 00:33:43 +02:00
2020-03-06 22:39:32 +01:00
if not config["gc"]["exclude_container_labels"]:
return containers
2020-03-05 23:51:21 +01:00
2020-03-06 22:39:32 +01:00
def include_container(container):
2020-03-13 23:39:33 +01:00
if self._should_exclude_container_with_labels(container):
return False
2020-03-06 22:39:32 +01:00
return True
2015-07-01 00:33:43 +02:00
2020-03-06 22:39:32 +01:00
return filter(include_container, containers)
def _should_exclude_container_with_labels(self, container):
config = self.config.config
if container["Labels"]:
for exclude_label in config["gc"]["exclude_container_labels"]:
if exclude_label.value:
matching_keys = fnmatch.filter(
container["Labels"].keys(),
exclude_label.key,
)
label_values_to_check = [
container["Labels"][matching_key] for matching_key in matching_keys
]
if fnmatch.filter(label_values_to_check, exclude_label.value):
return True
else:
if fnmatch.filter(container["Labels"].keys(), exclude_label.key):
return True
return False
2015-07-01 00:33:43 +02:00
2020-03-06 22:39:32 +01:00
def _should_remove_container(self, container, min_date):
state = container.get("State", {})
2015-07-01 00:33:43 +02:00
2020-03-06 22:39:32 +01:00
if state.get("Running"):
return False
2015-07-01 00:33:43 +02:00
2020-03-06 22:39:32 +01:00
if state.get("Ghost"):
return True
2015-07-01 00:33:43 +02:00
2020-03-06 22:39:32 +01:00
# Container was created, but never started
if state.get("FinishedAt") == self.YEAR_ZERO:
created_date = dateutil.parser.parse(container["Created"])
return created_date < min_date
2015-07-01 00:33:43 +02:00
2020-03-06 22:39:32 +01:00
finished_date = dateutil.parser.parse(state["FinishedAt"])
return finished_date < min_date
2016-12-01 02:00:06 +01:00
2020-03-06 22:39:32 +01:00
def _get_all_containers(self):
client = self.docker
self.logger.info("Getting all containers")
containers = client.containers(all=True)
self.logger.info("Found %s containers", len(containers))
return containers
2016-12-01 02:00:06 +01:00
2020-03-06 22:39:32 +01:00
def _get_all_images(self):
client = self.docker
self.logger.info("Getting all images")
images = client.images()
self.logger.info("Found %s images", len(images))
return images
def _get_dangling_volumes(self):
client = self.docker
self.logger.info("Getting dangling volumes")
volumes = client.volumes({"dangling": True})["Volumes"] or []
self.logger.info("Found %s dangling volumes", len(volumes))
return volumes
def cleanup_images(self, exclude_set):
"""Identify old images and remove them."""
# re-fetch container list so that we don't include removed containers
client = self.docker
config = self.config.config
containers = self._get_all_containers()
images = self._get_all_images()
if docker.utils.compare_version("1.21", client._version) < 0:
image_tags_in_use = {container["Image"] for container in containers}
images = self._filter_images_in_use(images, image_tags_in_use)
else:
# ImageID field was added in 1.21
image_ids_in_use = {container["ImageID"] for container in containers}
images = self._filter_images_in_use_by_id(images, image_ids_in_use)
images = self._filter_excluded_images(images, exclude_set)
2020-03-09 01:05:17 +01:00
self.logger.info(
"Removing images older than '{}'".format(
timedelta(config["gc"]["max_image_age"], dt_format="%Y-%m-%d, %H:%M:%S")
2020-03-06 22:39:32 +01:00
)
2020-03-09 01:05:17 +01:00
)
for image_summary in reversed(list(images)):
self._remove_image(image_summary, timedelta(config["gc"]["max_image_age"]))
2016-12-01 02:00:06 +01:00
2020-03-06 22:39:32 +01:00
def _filter_excluded_images(self, images, exclude_set):
2016-12-01 02:00:06 +01:00
2020-03-06 22:39:32 +01:00
def include_image(image_summary):
image_tags = image_summary.get("RepoTags")
2020-03-09 01:05:17 +01:00
if self._no_image_tags(image_tags):
2020-03-06 22:39:32 +01:00
return True
for exclude_pattern in exclude_set:
if fnmatch.filter(image_tags, exclude_pattern):
return False
return True
2016-12-01 02:00:06 +01:00
2020-03-06 22:39:32 +01:00
return filter(include_image, images)
2016-12-01 02:00:06 +01:00
2020-03-06 22:39:32 +01:00
def _filter_images_in_use(self, images, image_tags_in_use):
2016-12-01 02:00:06 +01:00
2020-03-06 22:39:32 +01:00
def get_tag_set(image_summary):
image_tags = image_summary.get("RepoTags")
if self._no_image_tags(image_tags):
# The repr of the image Id used by client.containers()
2020-04-11 14:50:11 +02:00
return set(["{id}:latest".format(id=image_summary["Id"][:12])])
2020-03-06 22:39:32 +01:00
return set(image_tags)
2015-07-01 00:33:43 +02:00
2020-03-06 22:39:32 +01:00
def image_not_in_use(image_summary):
return not get_tag_set(image_summary) & image_tags_in_use
2015-07-01 00:33:43 +02:00
2020-03-06 22:39:32 +01:00
return filter(image_not_in_use, images)
2020-03-05 23:51:21 +01:00
2020-03-06 22:39:32 +01:00
def _filter_images_in_use_by_id(self, images, image_ids_in_use):
2015-07-01 00:33:43 +02:00
2020-03-06 22:39:32 +01:00
def image_not_in_use(image_summary):
return image_summary["Id"] not in image_ids_in_use
2015-07-01 00:33:43 +02:00
2020-03-06 22:39:32 +01:00
return filter(image_not_in_use, images)
2015-07-01 00:33:43 +02:00
2020-03-06 22:39:32 +01:00
def _is_image_old(self, image, min_date):
return dateutil.parser.parse(image["Created"]) < min_date
2020-03-06 22:39:32 +01:00
def _no_image_tags(self, image_tags):
return not image_tags or image_tags == ["<none>:<none>"]
2020-03-09 01:05:17 +01:00
def _remove_image(self, image_summary, min_date):
2020-03-06 22:39:32 +01:00
config = self.config.config
client = self.docker
image = self._api_call(client.inspect_image, image=image_summary["Id"])
2020-03-06 22:39:32 +01:00
if not image or not self._is_image_old(image, min_date):
return
2020-04-11 14:50:11 +02:00
self.logger.info(
"Removing image {name}".format(name=self._format_image(image, image_summary))
)
2020-03-06 22:39:32 +01:00
if config["dry_run"]:
return
2018-03-20 23:30:38 +01:00
2020-03-06 22:39:32 +01:00
image_tags = image_summary.get("RepoTags")
# If there are no tags, remove the id
if self._no_image_tags(image_tags):
self._api_call(client.remove_image, image=image_summary["Id"])
return
# Remove any repository tags so we don't hit 409 Conflict
for image_tag in image_tags:
self._api_call(client.remove_image, image=image_tag)
2020-03-09 01:05:17 +01:00
def _remove_volume(self, volume):
2020-03-06 22:39:32 +01:00
config = self.config.config
2020-03-09 01:05:17 +01:00
client = self.docker
2020-03-06 22:39:32 +01:00
if not volume:
return
2020-04-11 14:50:11 +02:00
self.logger.info("Removing volume {name}".format(name=volume["Name"]))
2020-03-06 22:39:32 +01:00
if config["dry_run"]:
return
self._api_call(client.remove_volume, name=volume["Name"])
2020-03-09 01:05:17 +01:00
def cleanup_volumes(self):
2020-03-06 22:39:32 +01:00
"""Identify old volumes and remove them."""
2020-03-09 01:05:17 +01:00
dangling_volumes = self._get_dangling_volumes()
2020-03-06 22:39:32 +01:00
2020-03-09 10:25:45 +01:00
self.logger.info("Removing dangling volumes")
2020-03-06 22:39:32 +01:00
for volume in reversed(dangling_volumes):
self.logger.info("Removing dangling volume %s", volume["Name"])
2020-03-09 01:05:17 +01:00
self._remove_volume(volume)
2020-03-06 22:39:32 +01:00
def _api_call(self, func, **kwargs):
try:
return func(**kwargs)
except requests.exceptions.Timeout as e:
2020-04-11 14:50:11 +02:00
params = ",".join("%s=%s" % item for item in kwargs.items()) # noqa
self.logger.warn(
"Failed to call {name} {params} {msg}".format(
name=func.__name__, params=params, msg=str(e)
)
)
except docker.errors.APIError as e:
params = ",".join("%s=%s" % item for item in kwargs.items()) # noqa
self.logger.warn(
"Error calling {name} {params} {msg}".format(
name=func.__name__, params=params, msg=str(e)
)
)
2020-03-06 22:39:32 +01:00
def _format_image(self, image, image_summary):
def get_tags():
tags = image_summary.get("RepoTags")
if not tags or tags == ["<none>:<none>"]:
return ""
return ", ".join(tags)
2020-04-11 14:50:11 +02:00
return "{id} {tags}".format(id=image["Id"][:16], tags=get_tags())
2020-03-06 22:39:32 +01:00
def _build_exclude_set(self):
config = self.config.config
2020-03-09 01:05:17 +01:00
exclude_set = set(config["gc"]["exclude_images"])
2020-03-06 22:39:32 +01:00
def is_image_tag(line):
return line and not line.startswith("#")
return exclude_set
def _format_exclude_labels(self):
config = self.config.config
exclude_labels = []
for exclude_label_arg in config["gc"]["exclude_container_labels"]:
2020-03-06 22:39:32 +01:00
split_exclude_label = exclude_label_arg.split("=", 1)
exclude_label_key = split_exclude_label[0]
if len(split_exclude_label) == 2:
exclude_label_value = split_exclude_label[1]
else:
exclude_label_value = None
exclude_labels.append(
self.ExcludeLabel(
key=exclude_label_key,
value=exclude_label_value,
)
)
config["gc"]["exclude_container_labels"] = exclude_labels
2018-03-20 23:30:38 +01:00
2020-03-06 22:39:32 +01:00
def _get_docker_client(self):
config = self.config.config
2020-03-15 22:58:22 +01:00
try:
return docker.APIClient(version="auto", timeout=config["http_timeout"])
except docker.errors.DockerException as e:
self.log.sysexit_with_message("Can't create docker client\n{}".format(e))
2015-07-01 00:33:43 +02:00
2020-03-09 01:05:17 +01:00
def run(self):
"""Garbage collector main method."""
self.logger.info("Start garbage collection")
config = self.config.config
self._format_exclude_labels()
2015-07-01 00:33:43 +02:00
2020-03-09 01:05:17 +01:00
if config["gc"]["max_container_age"]:
self.cleanup_containers()
2018-03-20 23:30:38 +01:00
2020-03-09 01:05:17 +01:00
if config["gc"]["max_image_age"]:
exclude_set = self._build_exclude_set()
self.cleanup_images(exclude_set)
2020-03-09 01:05:17 +01:00
if config["gc"]["dangling_volumes"]:
self.cleanup_volumes()
2015-07-01 00:33:43 +02:00
2020-03-09 01:05:17 +01:00
if (
2020-03-15 14:50:58 +01:00
not config["gc"]["max_container_age"] and not config["gc"]["max_image_age"]
and not config["gc"]["dangling_volumes"]
2020-03-09 01:05:17 +01:00
):
self.logger.warn("Skipped, no arguments given")