ansible-later/env_27/lib/python2.7/site-packages/ansible/modules/cloud/amazon/data_pipeline.py
2019-04-11 13:00:36 +02:00

603 lines
20 KiB
Python

#!/usr/bin/python
#
# Copyright (c) 2017 Ansible Project
# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
from __future__ import absolute_import, division, print_function
__metaclass__ = type
ANSIBLE_METADATA = {'metadata_version': '1.1',
'status': ['preview'],
'supported_by': 'community'}
DOCUMENTATION = '''
---
module: data_pipeline
version_added: "2.4"
author:
- Raghu Udiyar <raghusiddarth@gmail.com> (@raags)
- Sloane Hertel <shertel@redhat.com>
requirements: [ "boto3" ]
short_description: Create and manage AWS Datapipelines
extends_documentation_fragment:
- aws
- ec2
description:
- Create and manage AWS Datapipelines. Creation is not idempotent in AWS, so the I(uniqueId) is created by hashing the options (minus objects)
given to the datapipeline.
- The pipeline definition must be in the format given here
U(http://docs.aws.amazon.com/datapipeline/latest/APIReference/API_PutPipelineDefinition.html#API_PutPipelineDefinition_RequestSyntax).
- Also operations will wait for a configurable amount of time to ensure the pipeline is in the requested state.
options:
name:
description:
- The name of the Datapipeline to create/modify/delete.
required: true
description:
description:
- An optional description for the pipeline being created.
default: ''
objects:
description:
- A list of pipeline object definitions, each of which is a dict that takes the keys C(id), C(name) and C(fields).
suboptions:
id:
description:
- The ID of the object.
name:
description:
- The name of the object.
fields:
description:
- A list of dicts that take the keys C(key) and C(stringValue)/C(refValue).
The value is specified as a reference to another object C(refValue) or as a string value C(stringValue)
but not as both.
parameters:
description:
- A list of parameter objects (dicts) in the pipeline definition.
suboptions:
id:
description:
- The ID of the parameter object.
attributes:
description:
- A list of attributes (dicts) of the parameter object. Each attribute takes the keys C(key) and C(stringValue) both
of which are strings.
values:
description:
- A list of parameter values (dicts) in the pipeline definition. Each dict takes the keys C(id) and C(stringValue) both
of which are strings.
timeout:
description:
- Time in seconds to wait for the pipeline to transition to the requested state, fail otherwise.
default: 300
state:
description:
- The requested state of the pipeline.
choices: ['present', 'absent', 'active', 'inactive']
default: present
tags:
description:
- A dict of key:value pair(s) to add to the pipeline.
'''
EXAMPLES = '''
# Note: These examples do not set authentication details, see the AWS Guide for details.
# Create pipeline
- data_pipeline:
name: test-dp
region: us-west-2
objects: "{{pipelineObjects}}"
parameters: "{{pipelineParameters}}"
values: "{{pipelineValues}}"
tags:
key1: val1
key2: val2
state: present
# Example populating and activating a pipeline that demonstrates two ways of providing pipeline objects
- data_pipeline:
name: test-dp
objects:
- "id": "DefaultSchedule"
"name": "Every 1 day"
"fields":
- "key": "period"
"stringValue": "1 days"
- "key": "type"
"stringValue": "Schedule"
- "key": "startAt"
"stringValue": "FIRST_ACTIVATION_DATE_TIME"
- "id": "Default"
"name": "Default"
"fields": [ { "key": "resourceRole", "stringValue": "my_resource_role" },
{ "key": "role", "stringValue": "DataPipelineDefaultRole" },
{ "key": "pipelineLogUri", "stringValue": "s3://my_s3_log.txt" },
{ "key": "scheduleType", "stringValue": "cron" },
{ "key": "schedule", "refValue": "DefaultSchedule" },
{ "key": "failureAndRerunMode", "stringValue": "CASCADE" } ]
state: active
# Activate pipeline
- data_pipeline:
name: test-dp
region: us-west-2
state: active
# Delete pipeline
- data_pipeline:
name: test-dp
region: us-west-2
state: absent
'''
RETURN = '''
changed:
description: whether the data pipeline has been modified
type: bool
returned: always
sample:
changed: true
result:
description:
- Contains the data pipeline data (data_pipeline) and a return message (msg).
If the data pipeline exists data_pipeline will contain the keys description, name,
pipeline_id, state, tags, and unique_id. If the data pipeline does not exist then
data_pipeline will be an empty dict. The msg describes the status of the operation.
returned: always
type: dict
'''
import hashlib
import json
import time
import traceback
try:
import boto3
from botocore.exceptions import ClientError
HAS_BOTO3 = True
except ImportError:
HAS_BOTO3 = False
from ansible.module_utils.basic import AnsibleModule
from ansible.module_utils.ec2 import ec2_argument_spec, get_aws_connection_info, boto3_conn, camel_dict_to_snake_dict
from ansible.module_utils._text import to_text
DP_ACTIVE_STATES = ['ACTIVE', 'SCHEDULED']
DP_INACTIVE_STATES = ['INACTIVE', 'PENDING', 'FINISHED', 'DELETING']
DP_ACTIVATING_STATE = 'ACTIVATING'
DP_DEACTIVATING_STATE = 'DEACTIVATING'
PIPELINE_DOESNT_EXIST = '^.*Pipeline with id: {0} does not exist$'
class DataPipelineNotFound(Exception):
pass
class TimeOutException(Exception):
pass
def pipeline_id(client, name):
"""Return pipeline id for the given pipeline name
:param object client: boto3 datapipeline client
:param string name: pipeline name
:returns: pipeline id
:raises: DataPipelineNotFound
"""
pipelines = client.list_pipelines()
for dp in pipelines['pipelineIdList']:
if dp['name'] == name:
return dp['id']
raise DataPipelineNotFound
def pipeline_description(client, dp_id):
"""Return pipeline description list
:param object client: boto3 datapipeline client
:returns: pipeline description dictionary
:raises: DataPipelineNotFound
"""
try:
return client.describe_pipelines(pipelineIds=[dp_id])
except ClientError as e:
raise DataPipelineNotFound
def pipeline_field(client, dp_id, field):
"""Return a pipeline field from the pipeline description.
The available fields are listed in describe_pipelines output.
:param object client: boto3 datapipeline client
:param string dp_id: pipeline id
:param string field: pipeline description field
:returns: pipeline field information
"""
dp_description = pipeline_description(client, dp_id)
for field_key in dp_description['pipelineDescriptionList'][0]['fields']:
if field_key['key'] == field:
return field_key['stringValue']
raise KeyError("Field key {0} not found!".format(field))
def run_with_timeout(timeout, func, *func_args, **func_kwargs):
"""Run func with the provided args and kwargs, and wait utill
timeout for truthy return value
:param int timeout: time to wait for status
:param function func: function to run, should return True or False
:param args func_args: function args to pass to func
:param kwargs func_kwargs: function key word args
:returns: True if func returns truthy within timeout
:raises: TimeOutException
"""
for _ in range(timeout // 10):
if func(*func_args, **func_kwargs):
return True
else:
# check every 10s
time.sleep(10)
raise TimeOutException
def check_dp_exists(client, dp_id):
"""Check if datapipeline exists
:param object client: boto3 datapipeline client
:param string dp_id: pipeline id
:returns: True or False
"""
try:
# pipeline_description raises DataPipelineNotFound
if pipeline_description(client, dp_id):
return True
else:
return False
except DataPipelineNotFound:
return False
def check_dp_status(client, dp_id, status):
"""Checks if datapipeline matches states in status list
:param object client: boto3 datapipeline client
:param string dp_id: pipeline id
:param list status: list of states to check against
:returns: True or False
"""
if not isinstance(status, list):
raise AssertionError()
if pipeline_field(client, dp_id, field="@pipelineState") in status:
return True
else:
return False
def pipeline_status_timeout(client, dp_id, status, timeout):
args = (client, dp_id, status)
return run_with_timeout(timeout, check_dp_status, *args)
def pipeline_exists_timeout(client, dp_id, timeout):
args = (client, dp_id)
return run_with_timeout(timeout, check_dp_exists, *args)
def activate_pipeline(client, module):
"""Activates pipeline
"""
dp_name = module.params.get('name')
timeout = module.params.get('timeout')
try:
dp_id = pipeline_id(client, dp_name)
except DataPipelineNotFound:
module.fail_json(msg='Data Pipeline {0} not found'.format(dp_name))
if pipeline_field(client, dp_id, field="@pipelineState") in DP_ACTIVE_STATES:
changed = False
else:
try:
client.activate_pipeline(pipelineId=dp_id)
except ClientError as e:
if e.response["Error"]["Code"] == "InvalidRequestException":
module.fail_json(msg="You need to populate your pipeline before activation.")
try:
pipeline_status_timeout(client, dp_id, status=DP_ACTIVE_STATES,
timeout=timeout)
except TimeOutException:
if pipeline_field(client, dp_id, field="@pipelineState") == "FINISHED":
# activated but completed more rapidly than it was checked
pass
else:
module.fail_json(msg=('Data Pipeline {0} failed to activate '
'within timeout {1} seconds').format(dp_name, timeout))
changed = True
data_pipeline = get_result(client, dp_id)
result = {'data_pipeline': data_pipeline,
'msg': 'Data Pipeline {0} activated.'.format(dp_name)}
return (changed, result)
def deactivate_pipeline(client, module):
"""Deactivates pipeline
"""
dp_name = module.params.get('name')
timeout = module.params.get('timeout')
try:
dp_id = pipeline_id(client, dp_name)
except DataPipelineNotFound:
module.fail_json(msg='Data Pipeline {0} not found'.format(dp_name))
if pipeline_field(client, dp_id, field="@pipelineState") in DP_INACTIVE_STATES:
changed = False
else:
client.deactivate_pipeline(pipelineId=dp_id)
try:
pipeline_status_timeout(client, dp_id, status=DP_INACTIVE_STATES,
timeout=timeout)
except TimeOutException:
module.fail_json(msg=('Data Pipeline {0} failed to deactivate'
'within timeout {1} seconds').format(dp_name, timeout))
changed = True
data_pipeline = get_result(client, dp_id)
result = {'data_pipeline': data_pipeline,
'msg': 'Data Pipeline {0} deactivated.'.format(dp_name)}
return (changed, result)
def _delete_dp_with_check(dp_id, client, timeout):
client.delete_pipeline(pipelineId=dp_id)
try:
pipeline_status_timeout(client=client, dp_id=dp_id, status=[PIPELINE_DOESNT_EXIST], timeout=timeout)
except DataPipelineNotFound:
return True
def delete_pipeline(client, module):
"""Deletes pipeline
"""
dp_name = module.params.get('name')
timeout = module.params.get('timeout')
try:
dp_id = pipeline_id(client, dp_name)
_delete_dp_with_check(dp_id, client, timeout)
changed = True
except DataPipelineNotFound:
changed = False
except TimeOutException:
module.fail_json(msg=('Data Pipeline {0} failed to delete'
'within timeout {1} seconds').format(dp_name, timeout))
result = {'data_pipeline': {},
'msg': 'Data Pipeline {0} deleted'.format(dp_name)}
return (changed, result)
def build_unique_id(module):
data = dict(module.params)
# removing objects from the unique id so we can update objects or populate the pipeline after creation without needing to make a new pipeline
[data.pop(each, None) for each in ('objects', 'timeout')]
json_data = json.dumps(data, sort_keys=True).encode("utf-8")
hashed_data = hashlib.md5(json_data).hexdigest()
return hashed_data
def format_tags(tags):
""" Reformats tags
:param dict tags: dict of data pipeline tags (e.g. {key1: val1, key2: val2, key3: val3})
:returns: list of dicts (e.g. [{key: key1, value: val1}, {key: key2, value: val2}, {key: key3, value: val3}])
"""
return [dict(key=k, value=v) for k, v in tags.items()]
def get_result(client, dp_id):
""" Get the current state of the data pipeline and reformat it to snake_case for exit_json
:param object client: boto3 datapipeline client
:param string dp_id: pipeline id
:returns: reformatted dict of pipeline description
"""
# pipeline_description returns a pipelineDescriptionList of length 1
# dp is a dict with keys "description" (str), "fields" (list), "name" (str), "pipelineId" (str), "tags" (dict)
dp = pipeline_description(client, dp_id)['pipelineDescriptionList'][0]
# Get uniqueId and pipelineState in fields to add to the exit_json result
dp["unique_id"] = pipeline_field(client, dp_id, field="uniqueId")
dp["pipeline_state"] = pipeline_field(client, dp_id, field="@pipelineState")
# Remove fields; can't make a list snake_case and most of the data is redundant
del dp["fields"]
# Note: tags is already formatted fine so we don't need to do anything with it
# Reformat data pipeline and add reformatted fields back
dp = camel_dict_to_snake_dict(dp)
return dp
def diff_pipeline(client, module, objects, unique_id, dp_name):
"""Check if there's another pipeline with the same unique_id and if so, checks if the object needs to be updated
"""
result = {}
changed = False
create_dp = False
# See if there is already a pipeline with the same unique_id
unique_id = build_unique_id(module)
try:
dp_id = pipeline_id(client, dp_name)
dp_unique_id = to_text(pipeline_field(client, dp_id, field="uniqueId"))
if dp_unique_id != unique_id:
# A change is expected but not determined. Updated to a bool in create_pipeline().
changed = "NEW_VERSION"
create_dp = True
# Unique ids are the same - check if pipeline needs modification
else:
dp_objects = client.get_pipeline_definition(pipelineId=dp_id)['pipelineObjects']
# Definition needs to be updated
if dp_objects != objects:
changed, msg = define_pipeline(client, module, objects, dp_id)
# No changes
else:
msg = 'Data Pipeline {0} is present'.format(dp_name)
data_pipeline = get_result(client, dp_id)
result = {'data_pipeline': data_pipeline,
'msg': msg}
except DataPipelineNotFound:
create_dp = True
return create_dp, changed, result
def define_pipeline(client, module, objects, dp_id):
"""Puts pipeline definition
"""
dp_name = module.params.get('name')
if pipeline_field(client, dp_id, field="@pipelineState") == "FINISHED":
msg = 'Data Pipeline {0} is unable to be updated while in state FINISHED.'.format(dp_name)
changed = False
elif objects:
parameters = module.params.get('parameters')
values = module.params.get('values')
try:
client.put_pipeline_definition(pipelineId=dp_id,
pipelineObjects=objects,
parameterObjects=parameters,
parameterValues=values)
msg = 'Data Pipeline {0} has been updated.'.format(dp_name)
changed = True
except ClientError as e:
module.fail_json(msg="Failed to put the definition for pipeline {0}. Check that string/reference fields"
"are not empty and that the number of objects in the pipeline does not exceed maximum allowed"
"objects".format(dp_name), exception=traceback.format_exc())
else:
changed = False
msg = ""
return changed, msg
def create_pipeline(client, module):
"""Creates datapipeline. Uses uniqueId to achieve idempotency.
"""
dp_name = module.params.get('name')
objects = module.params.get('objects', None)
description = module.params.get('description', '')
tags = module.params.get('tags')
timeout = module.params.get('timeout')
unique_id = build_unique_id(module)
create_dp, changed, result = diff_pipeline(client, module, objects, unique_id, dp_name)
if changed == "NEW_VERSION":
# delete old version
changed, _ = delete_pipeline(client, module)
# There isn't a pipeline or it has different parameters than the pipeline in existence.
if create_dp:
# Make pipeline
try:
tags = format_tags(tags)
dp = client.create_pipeline(name=dp_name,
uniqueId=unique_id,
description=description,
tags=tags)
dp_id = dp['pipelineId']
pipeline_exists_timeout(client, dp_id, timeout)
except ClientError as e:
module.fail_json(msg="Failed to create the data pipeline {0}.".format(dp_name), exception=traceback.format_exc())
except TimeOutException:
module.fail_json(msg=('Data Pipeline {0} failed to create'
'within timeout {1} seconds').format(dp_name, timeout))
# Put pipeline definition
_, msg = define_pipeline(client, module, objects, dp_id)
changed = True
data_pipeline = get_result(client, dp_id)
result = {'data_pipeline': data_pipeline,
'msg': 'Data Pipeline {0} created.'.format(dp_name) + msg}
return (changed, result)
def main():
argument_spec = ec2_argument_spec()
argument_spec.update(
dict(
name=dict(required=True),
version=dict(required=False),
description=dict(required=False, default=''),
objects=dict(required=False, type='list', default=[]),
parameters=dict(required=False, type='list', default=[]),
timeout=dict(required=False, type='int', default=300),
state=dict(default='present', choices=['present', 'absent',
'active', 'inactive']),
tags=dict(required=False, type='dict', default={}),
values=dict(required=False, type='list', default=[])
)
)
module = AnsibleModule(argument_spec, supports_check_mode=False)
if not HAS_BOTO3:
module.fail_json(msg='boto3 is required for the datapipeline module!')
try:
region, ec2_url, aws_connect_kwargs = get_aws_connection_info(module, boto3=True)
if not region:
module.fail_json(msg="Region must be specified as a parameter, in EC2_REGION or AWS_REGION environment variables or in boto configuration file")
client = boto3_conn(module, conn_type='client',
resource='datapipeline', region=region,
endpoint=ec2_url, **aws_connect_kwargs)
except ClientError as e:
module.fail_json(msg="Can't authorize connection - " + str(e))
state = module.params.get('state')
if state == 'present':
changed, result = create_pipeline(client, module)
elif state == 'absent':
changed, result = delete_pipeline(client, module)
elif state == 'active':
changed, result = activate_pipeline(client, module)
elif state == 'inactive':
changed, result = deactivate_pipeline(client, module)
module.exit_json(result=result, changed=changed)
if __name__ == '__main__':
main()