mirror of
https://github.com/thegeeklab/ansible-later.git
synced 2024-11-18 02:40:40 +00:00
654 lines
24 KiB
Python
654 lines
24 KiB
Python
"""Python code parser."""
|
|
|
|
import six
|
|
import textwrap
|
|
import tokenize as tk
|
|
from itertools import chain, dropwhile
|
|
from re import compile as re
|
|
from .utils import log
|
|
|
|
try:
|
|
from StringIO import StringIO
|
|
except ImportError: # Python 3.0 and later
|
|
from io import StringIO
|
|
|
|
try:
|
|
next
|
|
except NameError: # Python 2.5 and earlier
|
|
nothing = object()
|
|
|
|
def next(obj, default=nothing):
|
|
if default == nothing:
|
|
return obj.next()
|
|
else:
|
|
try:
|
|
return obj.next()
|
|
except StopIteration:
|
|
return default
|
|
|
|
|
|
__all__ = ('Parser', 'Definition', 'Module', 'Package', 'Function',
|
|
'NestedFunction', 'Method', 'Class', 'NestedClass', 'AllError',
|
|
'StringIO', 'ParseError')
|
|
|
|
|
|
class ParseError(Exception):
|
|
def __str__(self):
|
|
return "Cannot parse file."
|
|
|
|
|
|
class UnexpectedTokenError(ParseError):
|
|
def __init__(self, token, expected_kind):
|
|
self.token = token
|
|
self.expected_kind = expected_kind
|
|
|
|
def __str__(self):
|
|
return "Unexpected token {}, expected {}".format(
|
|
self.token, self.expected_kind)
|
|
|
|
|
|
def humanize(string):
|
|
return re(r'(.)([A-Z]+)').sub(r'\1 \2', string).lower()
|
|
|
|
|
|
class Value(object):
|
|
"""A generic object with a list of preset fields."""
|
|
|
|
def __init__(self, *args):
|
|
if len(self._fields) != len(args):
|
|
raise ValueError('got {} arguments for {} fields for {}: {}'
|
|
.format(len(args), len(self._fields),
|
|
self.__class__.__name__, self._fields))
|
|
vars(self).update(zip(self._fields, args))
|
|
|
|
def __hash__(self):
|
|
return hash(repr(self))
|
|
|
|
def __eq__(self, other):
|
|
return other and vars(self) == vars(other)
|
|
|
|
def __repr__(self):
|
|
kwargs = ', '.join('{}={!r}'.format(field, getattr(self, field))
|
|
for field in self._fields)
|
|
return '{}({})'.format(self.__class__.__name__, kwargs)
|
|
|
|
|
|
class Definition(Value):
|
|
"""A Python source code definition (could be class, function, etc)."""
|
|
|
|
_fields = ('name', '_source', 'start', 'end', 'decorators', 'docstring',
|
|
'children', 'parent', 'skipped_error_codes')
|
|
|
|
_human = property(lambda self: humanize(type(self).__name__))
|
|
kind = property(lambda self: self._human.split()[-1])
|
|
module = property(lambda self: self.parent.module)
|
|
dunder_all = property(lambda self: self.module.dunder_all)
|
|
_slice = property(lambda self: slice(self.start - 1, self.end))
|
|
is_class = False
|
|
|
|
def __iter__(self):
|
|
return chain([self], *self.children)
|
|
|
|
@property
|
|
def error_lineno(self):
|
|
"""Get the line number with which to report violations."""
|
|
if isinstance(self.docstring, Docstring):
|
|
return self.docstring.start
|
|
return self.start
|
|
|
|
@property
|
|
def _publicity(self):
|
|
return {True: 'public', False: 'private'}[self.is_public]
|
|
|
|
@property
|
|
def source(self):
|
|
"""Return the source code for the definition."""
|
|
full_src = self._source[self._slice]
|
|
|
|
def is_empty_or_comment(line):
|
|
return line.strip() == '' or line.strip().startswith('#')
|
|
|
|
filtered_src = dropwhile(is_empty_or_comment, reversed(full_src))
|
|
return ''.join(reversed(list(filtered_src)))
|
|
|
|
def __str__(self):
|
|
out = 'in {} {} `{}`'.format(self._publicity, self._human, self.name)
|
|
if self.skipped_error_codes:
|
|
out += ' (skipping {})'.format(self.skipped_error_codes)
|
|
return out
|
|
|
|
|
|
class Module(Definition):
|
|
"""A Python source code module."""
|
|
|
|
_fields = ('name', '_source', 'start', 'end', 'decorators', 'docstring',
|
|
'children', 'parent', '_dunder_all', 'dunder_all_error',
|
|
'future_imports', 'skipped_error_codes')
|
|
_nest = staticmethod(lambda s: {'def': Function, 'class': Class}[s])
|
|
module = property(lambda self: self)
|
|
dunder_all = property(lambda self: self._dunder_all)
|
|
|
|
@property
|
|
def is_public(self):
|
|
return not self.name.startswith('_') or self.name.startswith('__')
|
|
|
|
def __str__(self):
|
|
return 'at module level'
|
|
|
|
|
|
class Package(Module):
|
|
"""A package is a __init__.py module."""
|
|
|
|
|
|
class Function(Definition):
|
|
"""A Python source code function."""
|
|
|
|
_nest = staticmethod(lambda s: {'def': NestedFunction,
|
|
'class': NestedClass}[s])
|
|
|
|
@property
|
|
def is_public(self):
|
|
"""Return True iff this function should be considered public."""
|
|
if self.dunder_all is not None:
|
|
return self.name in self.dunder_all
|
|
else:
|
|
return not self.name.startswith('_')
|
|
|
|
@property
|
|
def is_test(self):
|
|
"""Return True if this function is a test function/method.
|
|
|
|
We exclude tests from the imperative mood check, because to phrase
|
|
their docstring in the imperative mood, they would have to start with
|
|
a highly redundant "Test that ...".
|
|
|
|
"""
|
|
return self.name.startswith('test') or self.name == 'runTest'
|
|
|
|
|
|
class NestedFunction(Function):
|
|
"""A Python source code nested function."""
|
|
|
|
is_public = False
|
|
|
|
|
|
class Method(Function):
|
|
"""A Python source code method."""
|
|
|
|
@property
|
|
def is_magic(self):
|
|
"""Return True iff this method is a magic method (e.g., `__str__`)."""
|
|
return (self.name.startswith('__') and
|
|
self.name.endswith('__') and
|
|
self.name not in VARIADIC_MAGIC_METHODS)
|
|
|
|
@property
|
|
def is_init(self):
|
|
"""Return True iff this method is `__init__`."""
|
|
return self.name == '__init__'
|
|
|
|
@property
|
|
def is_public(self):
|
|
"""Return True iff this method should be considered public."""
|
|
# Check if we are a setter/deleter method, and mark as private if so.
|
|
for decorator in self.decorators:
|
|
# Given 'foo', match 'foo.bar' but not 'foobar' or 'sfoo'
|
|
if re(r"^{}\.".format(self.name)).match(decorator.name):
|
|
return False
|
|
name_is_public = (not self.name.startswith('_') or
|
|
self.name in VARIADIC_MAGIC_METHODS or
|
|
self.is_magic)
|
|
return self.parent.is_public and name_is_public
|
|
|
|
|
|
class Class(Definition):
|
|
"""A Python source code class."""
|
|
|
|
_nest = staticmethod(lambda s: {'def': Method, 'class': NestedClass}[s])
|
|
is_public = Function.is_public
|
|
is_class = True
|
|
|
|
|
|
class NestedClass(Class):
|
|
"""A Python source code nested class."""
|
|
|
|
@property
|
|
def is_public(self):
|
|
"""Return True iff this class should be considered public."""
|
|
return (not self.name.startswith('_') and
|
|
self.parent.is_class and
|
|
self.parent.is_public)
|
|
|
|
|
|
class Decorator(Value):
|
|
"""A decorator for function, method or class."""
|
|
|
|
_fields = 'name arguments'.split()
|
|
|
|
|
|
class Docstring(str):
|
|
"""Represent a docstring.
|
|
|
|
This is a string, but has additional start/end attributes representing
|
|
the start and end of the token.
|
|
|
|
"""
|
|
def __new__(cls, v, start, end):
|
|
return str.__new__(cls, v)
|
|
|
|
def __init__(self, v, start, end):
|
|
self.start = start
|
|
self.end = end
|
|
|
|
|
|
VARIADIC_MAGIC_METHODS = ('__init__', '__call__', '__new__')
|
|
|
|
|
|
class AllError(Exception):
|
|
"""Raised when there is a problem with __all__ when parsing."""
|
|
|
|
def __init__(self, message):
|
|
"""Initialize the error with a more specific message."""
|
|
Exception.__init__(
|
|
self, message + textwrap.dedent("""
|
|
That means pydocstyle cannot decide which definitions are
|
|
public. Variable __all__ should be present at most once in
|
|
each file, in form
|
|
`__all__ = ('a_public_function', 'APublicClass', ...)`.
|
|
More info on __all__: http://stackoverflow.com/q/44834/. ')
|
|
"""))
|
|
|
|
|
|
class TokenStream(object):
|
|
# A logical newline is where a new expression or statement begins. When
|
|
# there is a physical new line, but not a logical one, for example:
|
|
# (x +
|
|
# y)
|
|
# The token will be tk.NL, not tk.NEWLINE.
|
|
LOGICAL_NEWLINES = {tk.NEWLINE, tk.INDENT, tk.DEDENT}
|
|
|
|
def __init__(self, filelike):
|
|
self._generator = tk.generate_tokens(filelike.readline)
|
|
self.current = Token(*next(self._generator, None))
|
|
self.line = self.current.start[0]
|
|
self.log = log
|
|
self.got_logical_newline = True
|
|
|
|
def move(self):
|
|
previous = self.current
|
|
current = self._next_from_generator()
|
|
self.current = None if current is None else Token(*current)
|
|
self.line = self.current.start[0] if self.current else self.line
|
|
self.got_logical_newline = (previous.kind in self.LOGICAL_NEWLINES)
|
|
return previous
|
|
|
|
def _next_from_generator(self):
|
|
try:
|
|
return next(self._generator, None)
|
|
except (SyntaxError, tk.TokenError):
|
|
self.log.warning('error generating tokens', exc_info=True)
|
|
return None
|
|
|
|
def __iter__(self):
|
|
while True:
|
|
if self.current is not None:
|
|
yield self.current
|
|
else:
|
|
return
|
|
self.move()
|
|
|
|
|
|
class TokenKind(int):
|
|
def __repr__(self):
|
|
return "tk.{}".format(tk.tok_name[self])
|
|
|
|
|
|
class Token(Value):
|
|
_fields = 'kind value start end source'.split()
|
|
|
|
def __init__(self, *args):
|
|
super(Token, self).__init__(*args)
|
|
self.kind = TokenKind(self.kind)
|
|
|
|
def __str__(self):
|
|
return "{!r} ({})".format(self.kind, self.value)
|
|
|
|
|
|
class Parser(object):
|
|
"""A Python source code parser."""
|
|
|
|
def parse(self, filelike, filename):
|
|
"""Parse the given file-like object and return its Module object."""
|
|
self.log = log
|
|
self.source = filelike.readlines()
|
|
src = ''.join(self.source)
|
|
try:
|
|
compile(src, filename, 'exec')
|
|
except SyntaxError as error:
|
|
six.raise_from(ParseError(), error)
|
|
self.stream = TokenStream(StringIO(src))
|
|
self.filename = filename
|
|
self.dunder_all = None
|
|
self.dunder_all_error = None
|
|
self.future_imports = set()
|
|
self._accumulated_decorators = []
|
|
return self.parse_module()
|
|
|
|
# TODO: remove
|
|
def __call__(self, *args, **kwargs):
|
|
"""Call the parse method."""
|
|
return self.parse(*args, **kwargs)
|
|
|
|
current = property(lambda self: self.stream.current)
|
|
line = property(lambda self: self.stream.line)
|
|
|
|
def consume(self, kind):
|
|
"""Consume one token and verify it is of the expected kind."""
|
|
next_token = self.stream.move()
|
|
if next_token.kind != kind:
|
|
raise UnexpectedTokenError(token=next_token, expected_kind=kind)
|
|
|
|
def leapfrog(self, kind, value=None):
|
|
"""Skip tokens in the stream until a certain token kind is reached.
|
|
|
|
If `value` is specified, tokens whose values are different will also
|
|
be skipped.
|
|
"""
|
|
while self.current is not None:
|
|
if (self.current.kind == kind and
|
|
(value is None or self.current.value == value)):
|
|
self.consume(kind)
|
|
return
|
|
self.stream.move()
|
|
|
|
def parse_docstring(self):
|
|
"""Parse a single docstring and return its value."""
|
|
self.log.debug("parsing docstring, token is %s", self.current)
|
|
while self.current.kind in (tk.COMMENT, tk.NEWLINE, tk.NL):
|
|
self.stream.move()
|
|
self.log.debug("parsing docstring, token is %r (%s)",
|
|
self.current.kind, self.current.value)
|
|
if self.current.kind == tk.STRING:
|
|
docstring = Docstring(
|
|
self.current.value,
|
|
self.current.start[0],
|
|
self.current.end[0]
|
|
)
|
|
self.stream.move()
|
|
return docstring
|
|
return None
|
|
|
|
def parse_decorators(self):
|
|
"""Called after first @ is found.
|
|
|
|
Parse decorators into self._accumulated_decorators.
|
|
Continue to do so until encountering the 'def' or 'class' start token.
|
|
"""
|
|
name = []
|
|
arguments = []
|
|
at_arguments = False
|
|
|
|
while self.current is not None:
|
|
self.log.debug("parsing decorators, current token is %r (%s)",
|
|
self.current.kind, self.current.value)
|
|
if (self.current.kind == tk.NAME and
|
|
self.current.value in ['def', 'class']):
|
|
# Done with decorators - found function or class proper
|
|
break
|
|
elif self.current.kind == tk.OP and self.current.value == '@':
|
|
# New decorator found. Store the decorator accumulated so far:
|
|
self._accumulated_decorators.append(
|
|
Decorator(''.join(name), ''.join(arguments)))
|
|
# Now reset to begin accumulating the new decorator:
|
|
name = []
|
|
arguments = []
|
|
at_arguments = False
|
|
elif self.current.kind == tk.OP and self.current.value == '(':
|
|
at_arguments = True
|
|
elif self.current.kind == tk.OP and self.current.value == ')':
|
|
# Ignore close parenthesis
|
|
pass
|
|
elif self.current.kind == tk.NEWLINE or self.current.kind == tk.NL:
|
|
# Ignore newlines
|
|
pass
|
|
else:
|
|
# Keep accumulating current decorator's name or argument.
|
|
if not at_arguments:
|
|
name.append(self.current.value)
|
|
else:
|
|
arguments.append(self.current.value)
|
|
self.stream.move()
|
|
|
|
# Add decorator accumulated so far
|
|
self._accumulated_decorators.append(
|
|
Decorator(''.join(name), ''.join(arguments)))
|
|
|
|
def parse_definitions(self, class_, dunder_all=False):
|
|
"""Parse multiple definitions and yield them."""
|
|
while self.current is not None:
|
|
self.log.debug("parsing definition list, current token is %r (%s)",
|
|
self.current.kind, self.current.value)
|
|
self.log.debug('got_newline: %s', self.stream.got_logical_newline)
|
|
if dunder_all and self.current.value == '__all__':
|
|
self.parse_dunder_all()
|
|
elif (self.current.kind == tk.OP and
|
|
self.current.value == '@' and
|
|
self.stream.got_logical_newline):
|
|
self.consume(tk.OP)
|
|
self.parse_decorators()
|
|
elif self.current.value in ['def', 'class']:
|
|
yield self.parse_definition(class_._nest(self.current.value))
|
|
elif self.current.kind == tk.INDENT:
|
|
self.consume(tk.INDENT)
|
|
for definition in self.parse_definitions(class_):
|
|
yield definition
|
|
elif self.current.kind == tk.DEDENT:
|
|
self.consume(tk.DEDENT)
|
|
return
|
|
elif self.current.value == 'from':
|
|
self.parse_from_import_statement()
|
|
else:
|
|
self.stream.move()
|
|
|
|
def parse_dunder_all(self):
|
|
"""Parse the __all__ definition in a module."""
|
|
assert self.current.value == '__all__'
|
|
self.consume(tk.NAME)
|
|
# More than one __all__ definition means we ignore all __all__.
|
|
if self.dunder_all is not None or self.dunder_all_error is not None:
|
|
self.dunder_all = None
|
|
self.dunder_all_error = 'Could not evaluate contents of __all__. '
|
|
return
|
|
if self.current.value != '=':
|
|
self.dunder_all_error = 'Could not evaluate contents of __all__. '
|
|
return
|
|
self.consume(tk.OP)
|
|
if self.current.value not in '([':
|
|
self.dunder_all_error = 'Could not evaluate contents of __all__. '
|
|
return
|
|
self.consume(tk.OP)
|
|
|
|
dunder_all_content = "("
|
|
while self.current.kind != tk.OP or self.current.value not in ")]":
|
|
if self.current.kind in (tk.NL, tk.COMMENT):
|
|
pass
|
|
elif (self.current.kind == tk.STRING or
|
|
self.current.value == ','):
|
|
dunder_all_content += self.current.value
|
|
else:
|
|
self.dunder_all_error = (
|
|
'Unexpected token kind in __all__: {!r}. '
|
|
.format(self.current.kind))
|
|
return
|
|
self.stream.move()
|
|
self.consume(tk.OP)
|
|
dunder_all_content += ")"
|
|
try:
|
|
self.dunder_all = eval(dunder_all_content, {})
|
|
except BaseException as e:
|
|
self.dunder_all_error = (
|
|
'Could not evaluate contents of __all__.'
|
|
'\bThe value was {}. The exception was:\n{}'
|
|
.format(dunder_all_content, e))
|
|
|
|
while not self.current.kind in self.stream.LOGICAL_NEWLINES:
|
|
if self.current.kind != tk.COMMENT:
|
|
self.dunder_all = None
|
|
self.dunder_all_error = 'Could not evaluate contents of __all__. '
|
|
return
|
|
|
|
def parse_module(self):
|
|
"""Parse a module (and its children) and return a Module object."""
|
|
self.log.debug("parsing module.")
|
|
start = self.line
|
|
docstring = self.parse_docstring()
|
|
children = list(self.parse_definitions(Module, dunder_all=True))
|
|
assert self.current is None, self.current
|
|
end = self.line
|
|
cls = Module
|
|
if self.filename.endswith('__init__.py'):
|
|
cls = Package
|
|
module = cls(self.filename, self.source, start, end,
|
|
[], docstring, children, None, self.dunder_all,
|
|
self.dunder_all_error, None, '')
|
|
for child in module.children:
|
|
child.parent = module
|
|
module.future_imports = self.future_imports
|
|
self.log.debug("finished parsing module.")
|
|
return module
|
|
|
|
def parse_definition(self, class_):
|
|
"""Parse a definition and return its value in a `class_` object."""
|
|
start = self.line
|
|
self.consume(tk.NAME)
|
|
name = self.current.value
|
|
self.log.debug("parsing %s '%s'", class_.__name__, name)
|
|
self.stream.move()
|
|
if self.current.kind == tk.OP and self.current.value == '(':
|
|
parenthesis_level = 0
|
|
while True:
|
|
if self.current.kind == tk.OP:
|
|
if self.current.value == '(':
|
|
parenthesis_level += 1
|
|
elif self.current.value == ')':
|
|
parenthesis_level -= 1
|
|
if parenthesis_level == 0:
|
|
break
|
|
self.stream.move()
|
|
if self.current.kind != tk.OP or self.current.value != ':':
|
|
self.leapfrog(tk.OP, value=":")
|
|
else:
|
|
self.consume(tk.OP)
|
|
if self.current.kind in (tk.NEWLINE, tk.COMMENT):
|
|
skipped_error_codes = self.parse_skip_comment()
|
|
self.leapfrog(tk.INDENT)
|
|
assert self.current.kind != tk.INDENT
|
|
docstring = self.parse_docstring()
|
|
decorators = self._accumulated_decorators
|
|
self.log.debug("current accumulated decorators: %s", decorators)
|
|
self._accumulated_decorators = []
|
|
self.log.debug("parsing nested definitions.")
|
|
children = list(self.parse_definitions(class_))
|
|
self.log.debug("finished parsing nested definitions for '%s'",
|
|
name)
|
|
end = self.line - 1
|
|
else: # one-liner definition
|
|
skipped_error_codes = ''
|
|
docstring = self.parse_docstring()
|
|
decorators = [] # TODO
|
|
children = []
|
|
end = self.line
|
|
self.leapfrog(tk.NEWLINE)
|
|
definition = class_(name, self.source, start, end,
|
|
decorators, docstring, children, None,
|
|
skipped_error_codes)
|
|
for child in definition.children:
|
|
child.parent = definition
|
|
self.log.debug("finished parsing %s '%s'. Next token is %r",
|
|
class_.__name__, name, self.current)
|
|
return definition
|
|
|
|
def parse_skip_comment(self):
|
|
"""Parse a definition comment for noqa skips."""
|
|
skipped_error_codes = ''
|
|
if self.current.kind == tk.COMMENT:
|
|
if 'noqa: ' in self.current.value:
|
|
skipped_error_codes = ''.join(
|
|
self.current.value.split('noqa: ')[1:])
|
|
elif self.current.value.startswith('# noqa'):
|
|
skipped_error_codes = 'all'
|
|
return skipped_error_codes
|
|
|
|
def check_current(self, kind=None, value=None):
|
|
"""Verify the current token is of type `kind` and equals `value`."""
|
|
msg = textwrap.dedent("""
|
|
Unexpected token at line {self.line}:
|
|
|
|
In file: {self.filename}
|
|
|
|
Got kind {self.current.kind!r}
|
|
Got value {self.current.value}
|
|
""".format(self=self))
|
|
kind_valid = self.current.kind == kind if kind else True
|
|
value_valid = self.current.value == value if value else True
|
|
assert kind_valid and value_valid, msg
|
|
|
|
def parse_from_import_statement(self):
|
|
"""Parse a 'from x import y' statement.
|
|
|
|
The purpose is to find __future__ statements.
|
|
|
|
"""
|
|
self.log.debug('parsing from/import statement.')
|
|
is_future_import = self._parse_from_import_source()
|
|
self._parse_from_import_names(is_future_import)
|
|
|
|
def _parse_from_import_source(self):
|
|
"""Parse the 'from x import' part in a 'from x import y' statement.
|
|
|
|
Return true iff `x` is __future__.
|
|
"""
|
|
assert self.current.value == 'from', self.current.value
|
|
self.stream.move()
|
|
is_future_import = self.current.value == '__future__'
|
|
self.stream.move()
|
|
while (self.current is not None and
|
|
self.current.kind in (tk.DOT, tk.NAME, tk.OP) and
|
|
self.current.value != 'import'):
|
|
self.stream.move()
|
|
if self.current is None or self.current.value != 'import':
|
|
return False
|
|
self.check_current(value='import')
|
|
assert self.current.value == 'import', self.current.value
|
|
self.stream.move()
|
|
return is_future_import
|
|
|
|
def _parse_from_import_names(self, is_future_import):
|
|
"""Parse the 'y' part in a 'from x import y' statement."""
|
|
if self.current.value == '(':
|
|
self.consume(tk.OP)
|
|
expected_end_kinds = (tk.OP, )
|
|
else:
|
|
expected_end_kinds = (tk.NEWLINE, tk.ENDMARKER)
|
|
while self.current.kind not in expected_end_kinds and not (
|
|
self.current.kind == tk.OP and self.current.value == ';'):
|
|
if self.current.kind != tk.NAME:
|
|
self.stream.move()
|
|
continue
|
|
self.log.debug("parsing import, token is %r (%s)",
|
|
self.current.kind, self.current.value)
|
|
if is_future_import:
|
|
self.log.debug('found future import: %s', self.current.value)
|
|
self.future_imports.add(self.current.value)
|
|
self.consume(tk.NAME)
|
|
self.log.debug("parsing import, token is %r (%s)",
|
|
self.current.kind, self.current.value)
|
|
if self.current.kind == tk.NAME and self.current.value == 'as':
|
|
self.consume(tk.NAME) # as
|
|
if self.current.kind == tk.NAME:
|
|
self.consume(tk.NAME) # new name, irrelevant
|
|
if self.current.value == ',':
|
|
self.consume(tk.OP)
|
|
self.log.debug("parsing import, token is %r (%s)",
|
|
self.current.kind, self.current.value)
|