docker-tidy/dockertidy/GarbageCollector.py

307 lines
9.2 KiB
Python
Raw Normal View History

2020-03-01 17:42:29 +00:00
#!/usr/bin/env python3
"""Remove unused docker containers and images."""
2015-06-30 22:33:43 +00:00
import fnmatch
2015-06-30 22:33:43 +00:00
import logging
import sys
2020-03-01 17:42:29 +00:00
from collections import namedtuple
2015-06-30 22:33:43 +00:00
import dateutil.parser
import docker
import docker.errors
import requests.exceptions
from docker.utils import kwargs_from_env
2015-06-30 22:33:43 +00:00
2020-03-05 22:51:21 +00:00
from dockertidy.Config import SingleConfig
from dockertidy.Logger import SingleLog
2015-06-30 22:33:43 +00:00
# This seems to be something docker uses for a null/zero date
YEAR_ZERO = "0001-01-01T00:00:00Z"
2020-03-01 17:42:29 +00:00
ExcludeLabel = namedtuple("ExcludeLabel", ["key", "value"])
2018-03-20 22:30:38 +00:00
2015-06-30 22:33:43 +00:00
2020-03-05 22:51:21 +00:00
class GarbageCollector:
def __init__(self):
self.config = SingleConfig()
self.log = SingleLog()
self.logger = SingleLog().logger
def cleanup_containers(client, max_container_age, dry_run, exclude_container_labels):
2015-06-30 22:33:43 +00:00
all_containers = get_all_containers(client)
filtered_containers = filter_excluded_containers(
all_containers,
exclude_container_labels,
)
for container_summary in reversed(list(filtered_containers)):
container = api_call(
client.inspect_container,
2020-03-01 17:42:29 +00:00
container=container_summary["Id"],
)
if not container or not should_remove_container(
2020-03-05 22:51:21 +00:00
container,
max_container_age,
):
2015-06-30 22:33:43 +00:00
continue
2020-03-05 22:51:21 +00:00
log.info("Removing container %s %s %s" % (container["Id"][:16], container.get(
"Name", "").lstrip("/"), container["State"]["FinishedAt"]))
2015-06-30 22:33:43 +00:00
if not dry_run:
api_call(
client.remove_container,
2020-03-01 17:42:29 +00:00
container=container["Id"],
v=True,
)
def filter_excluded_containers(containers, exclude_container_labels):
2018-03-20 22:30:38 +00:00
if not exclude_container_labels:
return containers
def include_container(container):
2018-03-20 22:30:38 +00:00
if should_exclude_container_with_labels(
2020-03-05 22:51:21 +00:00
container,
exclude_container_labels,
):
return False
return True
2020-03-05 22:51:21 +00:00
return filter(include_container, containers)
def should_exclude_container_with_labels(container, exclude_container_labels):
2020-03-01 17:42:29 +00:00
if container["Labels"]:
for exclude_label in exclude_container_labels:
if exclude_label.value:
matching_keys = fnmatch.filter(
2020-03-01 17:42:29 +00:00
container["Labels"].keys(),
exclude_label.key,
)
label_values_to_check = [
2020-03-05 22:51:21 +00:00
container["Labels"][matching_key] for matching_key in matching_keys
]
if fnmatch.filter(label_values_to_check, exclude_label.value):
return True
else:
2020-03-05 22:51:21 +00:00
if fnmatch.filter(container["Labels"].keys(), exclude_label.key):
return True
return False
2015-06-30 22:33:43 +00:00
def should_remove_container(container, min_date):
2020-03-01 17:42:29 +00:00
state = container.get("State", {})
2020-03-01 17:42:29 +00:00
if state.get("Running"):
2015-06-30 22:33:43 +00:00
return False
2020-03-01 17:42:29 +00:00
if state.get("Ghost"):
2015-06-30 22:33:43 +00:00
return True
# Container was created, but never started
2020-03-01 17:42:29 +00:00
if state.get("FinishedAt") == YEAR_ZERO:
created_date = dateutil.parser.parse(container["Created"])
return created_date < min_date
2015-06-30 22:33:43 +00:00
2020-03-01 17:42:29 +00:00
finished_date = dateutil.parser.parse(state["FinishedAt"])
return finished_date < min_date
2015-06-30 22:33:43 +00:00
def get_all_containers(client):
2015-12-14 20:52:22 +00:00
log.info("Getting all containers")
2015-06-30 22:33:43 +00:00
containers = client.containers(all=True)
log.info("Found %s containers", len(containers))
return containers
def get_all_images(client):
log.info("Getting all images")
images = client.images()
log.info("Found %s images", len(images))
return images
2016-12-01 01:00:06 +00:00
def get_dangling_volumes(client):
log.info("Getting dangling volumes")
2020-03-01 17:42:29 +00:00
volumes = client.volumes({"dangling": True})["Volumes"] or []
2016-12-01 01:00:06 +00:00
log.info("Found %s dangling volumes", len(volumes))
return volumes
def cleanup_images(client, max_image_age, dry_run, exclude_set):
2020-03-01 17:42:29 +00:00
# re-fetch container list so that we don't include removed containers
2015-06-30 22:33:43 +00:00
containers = get_all_containers(client)
images = get_all_images(client)
2020-03-01 17:42:29 +00:00
if docker.utils.compare_version("1.21", client._version) < 0:
image_tags_in_use = {container["Image"] for container in containers}
images = filter_images_in_use(images, image_tags_in_use)
else:
# ImageID field was added in 1.21
2020-03-01 17:42:29 +00:00
image_ids_in_use = {container["ImageID"] for container in containers}
images = filter_images_in_use_by_id(images, image_ids_in_use)
images = filter_excluded_images(images, exclude_set)
2015-06-30 22:33:43 +00:00
for image_summary in reversed(list(images)):
2015-06-30 22:33:43 +00:00
remove_image(client, image_summary, max_image_age, dry_run)
def filter_excluded_images(images, exclude_set):
2020-03-05 22:51:21 +00:00
def include_image(image_summary):
2020-03-01 17:42:29 +00:00
image_tags = image_summary.get("RepoTags")
if no_image_tags(image_tags):
return True
for exclude_pattern in exclude_set:
if fnmatch.filter(image_tags, exclude_pattern):
return False
return True
return filter(include_image, images)
2015-06-30 22:33:43 +00:00
def filter_images_in_use(images, image_tags_in_use):
2020-03-05 22:51:21 +00:00
2015-06-30 22:33:43 +00:00
def get_tag_set(image_summary):
2020-03-01 17:42:29 +00:00
image_tags = image_summary.get("RepoTags")
2015-06-30 22:33:43 +00:00
if no_image_tags(image_tags):
# The repr of the image Id used by client.containers()
2020-03-01 17:42:29 +00:00
return set(["%s:latest" % image_summary["Id"][:12]])
2015-06-30 22:33:43 +00:00
return set(image_tags)
def image_not_in_use(image_summary):
2015-06-30 22:33:43 +00:00
return not get_tag_set(image_summary) & image_tags_in_use
return filter(image_not_in_use, images)
2015-06-30 22:33:43 +00:00
def filter_images_in_use_by_id(images, image_ids_in_use):
2020-03-05 22:51:21 +00:00
def image_not_in_use(image_summary):
2020-03-01 17:42:29 +00:00
return image_summary["Id"] not in image_ids_in_use
return filter(image_not_in_use, images)
2015-06-30 22:33:43 +00:00
def is_image_old(image, min_date):
2020-03-01 17:42:29 +00:00
return dateutil.parser.parse(image["Created"]) < min_date
2015-06-30 22:33:43 +00:00
def no_image_tags(image_tags):
2020-03-01 17:42:29 +00:00
return not image_tags or image_tags == ["<none>:<none>"]
2015-06-30 22:33:43 +00:00
def remove_image(client, image_summary, min_date, dry_run):
2020-03-01 17:42:29 +00:00
image = api_call(client.inspect_image, image=image_summary["Id"])
2015-06-30 22:33:43 +00:00
if not image or not is_image_old(image, min_date):
return
log.info("Removing image %s" % format_image(image, image_summary))
if dry_run:
return
2020-03-01 17:42:29 +00:00
image_tags = image_summary.get("RepoTags")
2015-06-30 22:33:43 +00:00
# If there are no tags, remove the id
if no_image_tags(image_tags):
2020-03-01 17:42:29 +00:00
api_call(client.remove_image, image=image_summary["Id"])
2015-06-30 22:33:43 +00:00
return
2020-03-01 17:42:29 +00:00
# Remove any repository tags so we don't hit 409 Conflict
2015-06-30 22:33:43 +00:00
for image_tag in image_tags:
api_call(client.remove_image, image=image_tag)
2015-06-30 22:33:43 +00:00
2016-12-01 01:00:06 +00:00
def remove_volume(client, volume, dry_run):
if not volume:
return
2020-03-01 17:42:29 +00:00
log.info("Removing volume %s" % volume["Name"])
2016-12-01 01:00:06 +00:00
if dry_run:
return
2020-03-01 17:42:29 +00:00
api_call(client.remove_volume, name=volume["Name"])
2016-12-01 01:00:06 +00:00
def cleanup_volumes(client, dry_run):
dangling_volumes = get_dangling_volumes(client)
for volume in reversed(dangling_volumes):
2020-03-01 17:42:29 +00:00
log.info("Removing dangling volume %s", volume["Name"])
2016-12-01 01:00:06 +00:00
remove_volume(client, volume, dry_run)
def api_call(func, **kwargs):
2015-06-30 22:33:43 +00:00
try:
return func(**kwargs)
2015-06-30 22:33:43 +00:00
except requests.exceptions.Timeout as e:
2020-03-01 17:42:29 +00:00
params = ",".join("%s=%s" % item for item in kwargs.items())
log.warn("Failed to call %s %s %s" % (func.__name__, params, e))
2015-06-30 22:33:43 +00:00
except docker.errors.APIError as ae:
2020-03-01 17:42:29 +00:00
params = ",".join("%s=%s" % item for item in kwargs.items())
log.warn("Error calling %s %s %s" % (func.__name__, params, ae))
2015-06-30 22:33:43 +00:00
def format_image(image, image_summary):
2020-03-05 22:51:21 +00:00
2015-06-30 22:33:43 +00:00
def get_tags():
2020-03-01 17:42:29 +00:00
tags = image_summary.get("RepoTags")
if not tags or tags == ["<none>:<none>"]:
return ""
return ", ".join(tags)
2015-06-30 22:33:43 +00:00
2020-03-01 17:42:29 +00:00
return "%s %s" % (image["Id"][:16], get_tags())
2015-06-30 22:33:43 +00:00
def build_exclude_set(image_tags, exclude_file):
exclude_set = set(image_tags or [])
def is_image_tag(line):
2020-03-01 17:42:29 +00:00
return line and not line.startswith("#")
if exclude_file:
2020-03-01 17:42:29 +00:00
lines = [line.strip() for line in exclude_file.read().split("\n")]
exclude_set.update(filter(is_image_tag, lines))
return exclude_set
2018-03-20 22:30:38 +00:00
def format_exclude_labels(exclude_label_args):
exclude_labels = []
for exclude_label_arg in exclude_label_args:
2020-03-01 17:42:29 +00:00
split_exclude_label = exclude_label_arg.split("=", 1)
2018-03-20 22:30:38 +00:00
exclude_label_key = split_exclude_label[0]
if len(split_exclude_label) == 2:
exclude_label_value = split_exclude_label[1]
else:
exclude_label_value = None
2020-03-05 22:51:21 +00:00
exclude_labels.append(ExcludeLabel(
key=exclude_label_key,
value=exclude_label_value,
))
2018-03-20 22:30:38 +00:00
return exclude_labels
2015-06-30 22:33:43 +00:00
def main():
2020-03-05 22:51:21 +00:00
logging.basicConfig(level=logging.INFO, format="%(message)s", stream=sys.stdout)
2015-06-30 22:33:43 +00:00
args = get_args()
2020-03-05 22:51:21 +00:00
client = docker.APIClient(version="auto", timeout=args.timeout, **kwargs_from_env())
2015-06-30 22:33:43 +00:00
2020-03-05 22:51:21 +00:00
exclude_container_labels = format_exclude_labels(args.exclude_container_label)
2018-03-20 22:30:38 +00:00
if args.max_container_age:
cleanup_containers(
client,
args.max_container_age,
args.dry_run,
2018-03-20 22:30:38 +00:00
exclude_container_labels,
)
if args.max_image_age:
2020-03-05 22:51:21 +00:00
exclude_set = build_exclude_set(args.exclude_image, args.exclude_image_file)
cleanup_images(client, args.max_image_age, args.dry_run, exclude_set)
2015-06-30 22:33:43 +00:00
2016-12-01 01:00:06 +00:00
if args.dangling_volumes:
cleanup_volumes(client, args.dry_run)