ansible-later/env_27/lib/python2.7/site-packages/asn1crypto/_iri.py

# coding: utf-8

"""
Functions to convert unicode IRIs into ASCII byte string URIs and back. Exports
the following items:

 - iri_to_uri()
 - uri_to_iri()
"""

from __future__ import unicode_literals, division, absolute_import, print_function

from encodings import idna  # noqa
import codecs
import re
import sys

from ._errors import unwrap
from ._types import byte_cls, str_cls, type_name, bytes_to_list, int_types

if sys.version_info < (3,):
    from urlparse import urlsplit, urlunsplit
    from urllib import (
        quote as urlquote,
        unquote as unquote_to_bytes,
    )

else:
    from urllib.parse import (
        quote as urlquote,
        unquote_to_bytes,
        urlsplit,
        urlunsplit,
    )


def iri_to_uri(value):
    """
    Normalizes and encodes a unicode IRI into an ASCII byte string URI

    :param value:
        A unicode string of an IRI

    :return:
        A byte string of the ASCII-encoded URI
    """

    if not isinstance(value, str_cls):
        raise TypeError(unwrap(
            '''
            value must be a unicode string, not %s
            ''',
            type_name(value)
        ))

    scheme = None
    # Python 2.6 doesn't split properly is the URL doesn't start with http:// or https://
    if sys.version_info < (2, 7) and not value.startswith('http://') and not value.startswith('https://'):
        real_prefix = None
        prefix_match = re.match('^[^:]*://', value)
        if prefix_match:
            real_prefix = prefix_match.group(0)
            value = 'http://' + value[len(real_prefix):]
        parsed = urlsplit(value)
        if real_prefix:
            value = real_prefix + value[7:]
            scheme = _urlquote(real_prefix[:-3])
    else:
        parsed = urlsplit(value)

    if scheme is None:
        scheme = _urlquote(parsed.scheme)
    hostname = parsed.hostname
    if hostname is not None:
        hostname = hostname.encode('idna')
    # RFC 3986 allows userinfo to contain sub-delims
    username = _urlquote(parsed.username, safe='!$&\'()*+,;=')
    password = _urlquote(parsed.password, safe='!$&\'()*+,;=')
    port = parsed.port
    if port is not None:
        port = str_cls(port).encode('ascii')

    netloc = b''
    if username is not None:
        netloc += username
        if password:
            netloc += b':' + password
        netloc += b'@'
    if hostname is not None:
        netloc += hostname
    if port is not None:
        default_http = scheme == b'http' and port == b'80'
        default_https = scheme == b'https' and port == b'443'
        if not default_http and not default_https:
            netloc += b':' + port

    # RFC 3986 allows a path to contain sub-delims, plus "@" and ":"
    path = _urlquote(parsed.path, safe='/!$&\'()*+,;=@:')
    # RFC 3986 allows the query to contain sub-delims, plus "@", ":" , "/" and "?"
    query = _urlquote(parsed.query, safe='/?!$&\'()*+,;=@:')
    # RFC 3986 allows the fragment to contain sub-delims, plus "@", ":" , "/" and "?"
    fragment = _urlquote(parsed.fragment, safe='/?!$&\'()*+,;=@:')

    if query is None and fragment is None and path == b'/':
        path = None

    # Python 2.7 compat
    if path is None:
        path = ''

    output = urlunsplit((scheme, netloc, path, query, fragment))
    if isinstance(output, str_cls):
        output = output.encode('latin1')
    return output


def uri_to_iri(value):
    """
    Converts an ASCII URI byte string into a unicode IRI

    :param value:
        An ASCII-encoded byte string of the URI

    :return:
        A unicode string of the IRI
    """

    if not isinstance(value, byte_cls):
        raise TypeError(unwrap(
            '''
            value must be a byte string, not %s
            ''',
            type_name(value)
        ))

    parsed = urlsplit(value)

    scheme = parsed.scheme
    if scheme is not None:
        scheme = scheme.decode('ascii')

    username = _urlunquote(parsed.username, remap=[':', '@'])
    password = _urlunquote(parsed.password, remap=[':', '@'])
    hostname = parsed.hostname
    if hostname:
        hostname = hostname.decode('idna')
    port = parsed.port
    if port and not isinstance(port, int_types):
        port = port.decode('ascii')

    netloc = ''
    if username is not None:
        netloc += username
        if password:
            netloc += ':' + password
        netloc += '@'
    if hostname is not None:
        netloc += hostname
    if port is not None:
        netloc += ':' + str_cls(port)

    path = _urlunquote(parsed.path, remap=['/'], preserve=True)
    query = _urlunquote(parsed.query, remap=['&', '='], preserve=True)
    fragment = _urlunquote(parsed.fragment)

    return urlunsplit((scheme, netloc, path, query, fragment))


def _iri_utf8_errors_handler(exc):
    """
    Error handler for decoding UTF-8 parts of a URI into an IRI. Leaves byte
    sequences encoded in %XX format, but as part of a unicode string.

    :param exc:
        The UnicodeDecodeError exception

    :return:
        A 2-element tuple of (replacement unicode string, integer index to
        resume at)
    """

    bytes_as_ints = bytes_to_list(exc.object[exc.start:exc.end])
    replacements = ['%%%02x' % num for num in bytes_as_ints]
    return (''.join(replacements), exc.end)


codecs.register_error('iriutf8', _iri_utf8_errors_handler)


def _urlquote(string, safe=''):
    """
    Quotes a unicode string for use in a URL

    :param string:
        A unicode string

    :param safe:
        A unicode string of character to not encode

    :return:
        None (if string is None) or an ASCII byte string of the quoted string
    """

    if string is None or string == '':
        return None

    # Anything already hex quoted is pulled out of the URL and unquoted if
    # possible
    escapes = []
    if re.search('%[0-9a-fA-F]{2}', string):
        # Try to unquote any percent values, restoring them if they are not
        # valid UTF-8. Also, requote any safe chars since encoded versions of
        # those are functionally different than the unquoted ones.
        def _try_unescape(match):
            byte_string = unquote_to_bytes(match.group(0))
            unicode_string = byte_string.decode('utf-8', 'iriutf8')
            for safe_char in list(safe):
                unicode_string = unicode_string.replace(safe_char, '%%%02x' % ord(safe_char))
            return unicode_string
        string = re.sub('(?:%[0-9a-fA-F]{2})+', _try_unescape, string)

        # Once we have the minimal set of hex quoted values, removed them from
        # the string so that they are not double quoted
        def _extract_escape(match):
            escapes.append(match.group(0).encode('ascii'))
            return '\x00'
        string = re.sub('%[0-9a-fA-F]{2}', _extract_escape, string)

    output = urlquote(string.encode('utf-8'), safe=safe.encode('utf-8'))
    if not isinstance(output, byte_cls):
        output = output.encode('ascii')

    # Restore the existing quoted values that we extracted
    if len(escapes) > 0:
        def _return_escape(_):
            return escapes.pop(0)
        output = re.sub(b'%00', _return_escape, output)

    return output


def _urlunquote(byte_string, remap=None, preserve=None):
    """
    Unquotes a URI portion from a byte string into unicode using UTF-8

    :param byte_string:
        A byte string of the data to unquote

    :param remap:
        A list of characters (as unicode) that should be re-mapped to a
        %XX encoding. This is used when characters are not valid in part of a
        URL.

    :param preserve:
        A bool - indicates that the chars to be remapped if they occur in
        non-hex form, should be preserved. E.g. / for URL path.

    :return:
        A unicode string
    """

    if byte_string is None:
        return byte_string

    if byte_string == b'':
        return ''

    if preserve:
        replacements = ['\x1A', '\x1C', '\x1D', '\x1E', '\x1F']
        preserve_unmap = {}
        for char in remap:
            replacement = replacements.pop(0)
            preserve_unmap[replacement] = char
            byte_string = byte_string.replace(char.encode('ascii'), replacement.encode('ascii'))

    byte_string = unquote_to_bytes(byte_string)

    if remap:
        for char in remap:
            byte_string = byte_string.replace(char.encode('ascii'), ('%%%02x' % ord(char)).encode('ascii'))

    output = byte_string.decode('utf-8', 'iriutf8')

    if preserve:
        for replacement, original in preserve_unmap.items():
            output = output.replace(replacement, original)

    return output
add missing dependencies and fix drone pipeline 2019-04-11 11:00:36 +00:00			`# coding: utf-8`

			`"""`
			`Functions to convert unicode IRIs into ASCII byte string URIs and back. Exports`
			`the following items:`

			`- iri_to_uri()`
			`- uri_to_iri()`
			`"""`

			`from __future__ import unicode_literals, division, absolute_import, print_function`

			`from encodings import idna # noqa`
			`import codecs`
			`import re`
			`import sys`

			`from ._errors import unwrap`
			`from ._types import byte_cls, str_cls, type_name, bytes_to_list, int_types`

			`if sys.version_info < (3,):`
			`from urlparse import urlsplit, urlunsplit`
			`from urllib import (`
			`quote as urlquote,`
			`unquote as unquote_to_bytes,`
			`)`

			`else:`
			`from urllib.parse import (`
			`quote as urlquote,`
			`unquote_to_bytes,`
			`urlsplit,`
			`urlunsplit,`
			`)`


			`def iri_to_uri(value):`
			`"""`
			`Normalizes and encodes a unicode IRI into an ASCII byte string URI`

			`:param value:`
			`A unicode string of an IRI`

			`:return:`
			`A byte string of the ASCII-encoded URI`
			`"""`

			`if not isinstance(value, str_cls):`
			`raise TypeError(unwrap(`
			`'''`
			`value must be a unicode string, not %s`
			`''',`
			`type_name(value)`
			`))`

			`scheme = None`
			`# Python 2.6 doesn't split properly is the URL doesn't start with http:// or https://`
			`if sys.version_info < (2, 7) and not value.startswith('http://') and not value.startswith('https://'):`
			`real_prefix = None`
			`prefix_match = re.match('^[^:]*://', value)`
			`if prefix_match:`
			`real_prefix = prefix_match.group(0)`
			`value = 'http://' + value[len(real_prefix):]`
			`parsed = urlsplit(value)`
			`if real_prefix:`
			`value = real_prefix + value[7:]`
			`scheme = _urlquote(real_prefix[:-3])`
			`else:`
			`parsed = urlsplit(value)`

			`if scheme is None:`
			`scheme = _urlquote(parsed.scheme)`
			`hostname = parsed.hostname`
			`if hostname is not None:`
			`hostname = hostname.encode('idna')`
			`# RFC 3986 allows userinfo to contain sub-delims`
			`username = _urlquote(parsed.username, safe='!$&\'()*+,;=')`
			`password = _urlquote(parsed.password, safe='!$&\'()*+,;=')`
			`port = parsed.port`
			`if port is not None:`
			`port = str_cls(port).encode('ascii')`

			`netloc = b''`
			`if username is not None:`
			`netloc += username`
			`if password:`
			`netloc += b':' + password`
			`netloc += b'@'`
			`if hostname is not None:`
			`netloc += hostname`
			`if port is not None:`
			`default_http = scheme == b'http' and port == b'80'`
			`default_https = scheme == b'https' and port == b'443'`
			`if not default_http and not default_https:`
			`netloc += b':' + port`

			`# RFC 3986 allows a path to contain sub-delims, plus "@" and ":"`
			`path = _urlquote(parsed.path, safe='/!$&\'()*+,;=@:')`
			`# RFC 3986 allows the query to contain sub-delims, plus "@", ":" , "/" and "?"`
			`query = _urlquote(parsed.query, safe='/?!$&\'()*+,;=@:')`
			`# RFC 3986 allows the fragment to contain sub-delims, plus "@", ":" , "/" and "?"`
			`fragment = _urlquote(parsed.fragment, safe='/?!$&\'()*+,;=@:')`

			`if query is None and fragment is None and path == b'/':`
			`path = None`

			`# Python 2.7 compat`
			`if path is None:`
			`path = ''`

			`output = urlunsplit((scheme, netloc, path, query, fragment))`
			`if isinstance(output, str_cls):`
			`output = output.encode('latin1')`
			`return output`


			`def uri_to_iri(value):`
			`"""`
			`Converts an ASCII URI byte string into a unicode IRI`

			`:param value:`
			`An ASCII-encoded byte string of the URI`

			`:return:`
			`A unicode string of the IRI`
			`"""`

			`if not isinstance(value, byte_cls):`
			`raise TypeError(unwrap(`
			`'''`
			`value must be a byte string, not %s`
			`''',`
			`type_name(value)`
			`))`

			`parsed = urlsplit(value)`

			`scheme = parsed.scheme`
			`if scheme is not None:`
			`scheme = scheme.decode('ascii')`

			`username = _urlunquote(parsed.username, remap=[':', '@'])`
			`password = _urlunquote(parsed.password, remap=[':', '@'])`
			`hostname = parsed.hostname`
			`if hostname:`
			`hostname = hostname.decode('idna')`
			`port = parsed.port`
			`if port and not isinstance(port, int_types):`
			`port = port.decode('ascii')`

			`netloc = ''`
			`if username is not None:`
			`netloc += username`
			`if password:`
			`netloc += ':' + password`
			`netloc += '@'`
			`if hostname is not None:`
			`netloc += hostname`
			`if port is not None:`
			`netloc += ':' + str_cls(port)`

			`path = _urlunquote(parsed.path, remap=['/'], preserve=True)`
			`query = _urlunquote(parsed.query, remap=['&', '='], preserve=True)`
			`fragment = _urlunquote(parsed.fragment)`

			`return urlunsplit((scheme, netloc, path, query, fragment))`


			`def _iri_utf8_errors_handler(exc):`
			`"""`
			`Error handler for decoding UTF-8 parts of a URI into an IRI. Leaves byte`
			`sequences encoded in %XX format, but as part of a unicode string.`

			`:param exc:`
			`The UnicodeDecodeError exception`

			`:return:`
			`A 2-element tuple of (replacement unicode string, integer index to`
			`resume at)`
			`"""`

			`bytes_as_ints = bytes_to_list(exc.object[exc.start:exc.end])`
			`replacements = ['%%%02x' % num for num in bytes_as_ints]`
			`return (''.join(replacements), exc.end)`


			`codecs.register_error('iriutf8', _iri_utf8_errors_handler)`


			`def _urlquote(string, safe=''):`
			`"""`
			`Quotes a unicode string for use in a URL`

			`:param string:`
			`A unicode string`

			`:param safe:`
			`A unicode string of character to not encode`

			`:return:`
			`None (if string is None) or an ASCII byte string of the quoted string`
			`"""`

			`if string is None or string == '':`
			`return None`

			`# Anything already hex quoted is pulled out of the URL and unquoted if`
			`# possible`
			`escapes = []`
			`if re.search('%[0-9a-fA-F]{2}', string):`
			`# Try to unquote any percent values, restoring them if they are not`
			`# valid UTF-8. Also, requote any safe chars since encoded versions of`
			`# those are functionally different than the unquoted ones.`
			`def _try_unescape(match):`
			`byte_string = unquote_to_bytes(match.group(0))`
			`unicode_string = byte_string.decode('utf-8', 'iriutf8')`
			`for safe_char in list(safe):`
			`unicode_string = unicode_string.replace(safe_char, '%%%02x' % ord(safe_char))`
			`return unicode_string`
			`string = re.sub('(?:%[0-9a-fA-F]{2})+', _try_unescape, string)`

			`# Once we have the minimal set of hex quoted values, removed them from`
			`# the string so that they are not double quoted`
			`def _extract_escape(match):`
			`escapes.append(match.group(0).encode('ascii'))`
			`return '\x00'`
			`string = re.sub('%[0-9a-fA-F]{2}', _extract_escape, string)`

			`output = urlquote(string.encode('utf-8'), safe=safe.encode('utf-8'))`
			`if not isinstance(output, byte_cls):`
			`output = output.encode('ascii')`

			`# Restore the existing quoted values that we extracted`
			`if len(escapes) > 0:`
			`def _return_escape(_):`
			`return escapes.pop(0)`
			`output = re.sub(b'%00', _return_escape, output)`

			`return output`


			`def _urlunquote(byte_string, remap=None, preserve=None):`
			`"""`
			`Unquotes a URI portion from a byte string into unicode using UTF-8`

			`:param byte_string:`
			`A byte string of the data to unquote`

			`:param remap:`
			`A list of characters (as unicode) that should be re-mapped to a`
			`%XX encoding. This is used when characters are not valid in part of a`
			`URL.`

			`:param preserve:`
			`A bool - indicates that the chars to be remapped if they occur in`
			`non-hex form, should be preserved. E.g. / for URL path.`

			`:return:`
			`A unicode string`
			`"""`

			`if byte_string is None:`
			`return byte_string`

			`if byte_string == b'':`
			`return ''`

			`if preserve:`
			`replacements = ['\x1A', '\x1C', '\x1D', '\x1E', '\x1F']`
			`preserve_unmap = {}`
			`for char in remap:`
			`replacement = replacements.pop(0)`
			`preserve_unmap[replacement] = char`
			`byte_string = byte_string.replace(char.encode('ascii'), replacement.encode('ascii'))`

			`byte_string = unquote_to_bytes(byte_string)`

			`if remap:`
			`for char in remap:`
			`byte_string = byte_string.replace(char.encode('ascii'), ('%%%02x' % ord(char)).encode('ascii'))`

			`output = byte_string.decode('utf-8', 'iriutf8')`

			`if preserve:`
			`for replacement, original in preserve_unmap.items():`
			`output = output.replace(replacement, original)`

			`return output`