ansible-later/env_27/lib/python2.7/site-packages/pydocstyle/parser.py

654 lines
24 KiB
Python
Raw Normal View History

2019-04-11 15:56:20 +02:00
"""Python code parser."""
import six
import textwrap
import tokenize as tk
from itertools import chain, dropwhile
from re import compile as re
from .utils import log
try:
from StringIO import StringIO
except ImportError: # Python 3.0 and later
from io import StringIO
try:
next
except NameError: # Python 2.5 and earlier
nothing = object()
def next(obj, default=nothing):
if default == nothing:
return obj.next()
else:
try:
return obj.next()
except StopIteration:
return default
__all__ = ('Parser', 'Definition', 'Module', 'Package', 'Function',
'NestedFunction', 'Method', 'Class', 'NestedClass', 'AllError',
'StringIO', 'ParseError')
class ParseError(Exception):
def __str__(self):
return "Cannot parse file."
class UnexpectedTokenError(ParseError):
def __init__(self, token, expected_kind):
self.token = token
self.expected_kind = expected_kind
def __str__(self):
return "Unexpected token {}, expected {}".format(
self.token, self.expected_kind)
def humanize(string):
return re(r'(.)([A-Z]+)').sub(r'\1 \2', string).lower()
class Value(object):
"""A generic object with a list of preset fields."""
def __init__(self, *args):
if len(self._fields) != len(args):
raise ValueError('got {} arguments for {} fields for {}: {}'
.format(len(args), len(self._fields),
self.__class__.__name__, self._fields))
vars(self).update(zip(self._fields, args))
def __hash__(self):
return hash(repr(self))
def __eq__(self, other):
return other and vars(self) == vars(other)
def __repr__(self):
kwargs = ', '.join('{}={!r}'.format(field, getattr(self, field))
for field in self._fields)
return '{}({})'.format(self.__class__.__name__, kwargs)
class Definition(Value):
"""A Python source code definition (could be class, function, etc)."""
_fields = ('name', '_source', 'start', 'end', 'decorators', 'docstring',
'children', 'parent', 'skipped_error_codes')
_human = property(lambda self: humanize(type(self).__name__))
kind = property(lambda self: self._human.split()[-1])
module = property(lambda self: self.parent.module)
dunder_all = property(lambda self: self.module.dunder_all)
_slice = property(lambda self: slice(self.start - 1, self.end))
is_class = False
def __iter__(self):
return chain([self], *self.children)
@property
def error_lineno(self):
"""Get the line number with which to report violations."""
if isinstance(self.docstring, Docstring):
return self.docstring.start
return self.start
@property
def _publicity(self):
return {True: 'public', False: 'private'}[self.is_public]
@property
def source(self):
"""Return the source code for the definition."""
full_src = self._source[self._slice]
def is_empty_or_comment(line):
return line.strip() == '' or line.strip().startswith('#')
filtered_src = dropwhile(is_empty_or_comment, reversed(full_src))
return ''.join(reversed(list(filtered_src)))
def __str__(self):
out = 'in {} {} `{}`'.format(self._publicity, self._human, self.name)
if self.skipped_error_codes:
out += ' (skipping {})'.format(self.skipped_error_codes)
return out
class Module(Definition):
"""A Python source code module."""
_fields = ('name', '_source', 'start', 'end', 'decorators', 'docstring',
'children', 'parent', '_dunder_all', 'dunder_all_error',
'future_imports', 'skipped_error_codes')
_nest = staticmethod(lambda s: {'def': Function, 'class': Class}[s])
module = property(lambda self: self)
dunder_all = property(lambda self: self._dunder_all)
@property
def is_public(self):
return not self.name.startswith('_') or self.name.startswith('__')
def __str__(self):
return 'at module level'
class Package(Module):
"""A package is a __init__.py module."""
class Function(Definition):
"""A Python source code function."""
_nest = staticmethod(lambda s: {'def': NestedFunction,
'class': NestedClass}[s])
@property
def is_public(self):
"""Return True iff this function should be considered public."""
if self.dunder_all is not None:
return self.name in self.dunder_all
else:
return not self.name.startswith('_')
@property
def is_test(self):
"""Return True if this function is a test function/method.
We exclude tests from the imperative mood check, because to phrase
their docstring in the imperative mood, they would have to start with
a highly redundant "Test that ...".
"""
return self.name.startswith('test') or self.name == 'runTest'
class NestedFunction(Function):
"""A Python source code nested function."""
is_public = False
class Method(Function):
"""A Python source code method."""
@property
def is_magic(self):
"""Return True iff this method is a magic method (e.g., `__str__`)."""
return (self.name.startswith('__') and
self.name.endswith('__') and
self.name not in VARIADIC_MAGIC_METHODS)
@property
def is_init(self):
"""Return True iff this method is `__init__`."""
return self.name == '__init__'
@property
def is_public(self):
"""Return True iff this method should be considered public."""
# Check if we are a setter/deleter method, and mark as private if so.
for decorator in self.decorators:
# Given 'foo', match 'foo.bar' but not 'foobar' or 'sfoo'
if re(r"^{}\.".format(self.name)).match(decorator.name):
return False
name_is_public = (not self.name.startswith('_') or
self.name in VARIADIC_MAGIC_METHODS or
self.is_magic)
return self.parent.is_public and name_is_public
class Class(Definition):
"""A Python source code class."""
_nest = staticmethod(lambda s: {'def': Method, 'class': NestedClass}[s])
is_public = Function.is_public
is_class = True
class NestedClass(Class):
"""A Python source code nested class."""
@property
def is_public(self):
"""Return True iff this class should be considered public."""
return (not self.name.startswith('_') and
self.parent.is_class and
self.parent.is_public)
class Decorator(Value):
"""A decorator for function, method or class."""
_fields = 'name arguments'.split()
class Docstring(str):
"""Represent a docstring.
This is a string, but has additional start/end attributes representing
the start and end of the token.
"""
def __new__(cls, v, start, end):
return str.__new__(cls, v)
def __init__(self, v, start, end):
self.start = start
self.end = end
VARIADIC_MAGIC_METHODS = ('__init__', '__call__', '__new__')
class AllError(Exception):
"""Raised when there is a problem with __all__ when parsing."""
def __init__(self, message):
"""Initialize the error with a more specific message."""
Exception.__init__(
self, message + textwrap.dedent("""
That means pydocstyle cannot decide which definitions are
public. Variable __all__ should be present at most once in
each file, in form
`__all__ = ('a_public_function', 'APublicClass', ...)`.
More info on __all__: http://stackoverflow.com/q/44834/. ')
"""))
class TokenStream(object):
# A logical newline is where a new expression or statement begins. When
# there is a physical new line, but not a logical one, for example:
# (x +
# y)
# The token will be tk.NL, not tk.NEWLINE.
LOGICAL_NEWLINES = {tk.NEWLINE, tk.INDENT, tk.DEDENT}
def __init__(self, filelike):
self._generator = tk.generate_tokens(filelike.readline)
self.current = Token(*next(self._generator, None))
self.line = self.current.start[0]
self.log = log
self.got_logical_newline = True
def move(self):
previous = self.current
current = self._next_from_generator()
self.current = None if current is None else Token(*current)
self.line = self.current.start[0] if self.current else self.line
self.got_logical_newline = (previous.kind in self.LOGICAL_NEWLINES)
return previous
def _next_from_generator(self):
try:
return next(self._generator, None)
except (SyntaxError, tk.TokenError):
self.log.warning('error generating tokens', exc_info=True)
return None
def __iter__(self):
while True:
if self.current is not None:
yield self.current
else:
return
self.move()
class TokenKind(int):
def __repr__(self):
return "tk.{}".format(tk.tok_name[self])
class Token(Value):
_fields = 'kind value start end source'.split()
def __init__(self, *args):
super(Token, self).__init__(*args)
self.kind = TokenKind(self.kind)
def __str__(self):
return "{!r} ({})".format(self.kind, self.value)
class Parser(object):
"""A Python source code parser."""
def parse(self, filelike, filename):
"""Parse the given file-like object and return its Module object."""
self.log = log
self.source = filelike.readlines()
src = ''.join(self.source)
try:
compile(src, filename, 'exec')
except SyntaxError as error:
six.raise_from(ParseError(), error)
self.stream = TokenStream(StringIO(src))
self.filename = filename
self.dunder_all = None
self.dunder_all_error = None
self.future_imports = set()
self._accumulated_decorators = []
return self.parse_module()
# TODO: remove
def __call__(self, *args, **kwargs):
"""Call the parse method."""
return self.parse(*args, **kwargs)
current = property(lambda self: self.stream.current)
line = property(lambda self: self.stream.line)
def consume(self, kind):
"""Consume one token and verify it is of the expected kind."""
next_token = self.stream.move()
if next_token.kind != kind:
raise UnexpectedTokenError(token=next_token, expected_kind=kind)
def leapfrog(self, kind, value=None):
"""Skip tokens in the stream until a certain token kind is reached.
If `value` is specified, tokens whose values are different will also
be skipped.
"""
while self.current is not None:
if (self.current.kind == kind and
(value is None or self.current.value == value)):
self.consume(kind)
return
self.stream.move()
def parse_docstring(self):
"""Parse a single docstring and return its value."""
self.log.debug("parsing docstring, token is %s", self.current)
while self.current.kind in (tk.COMMENT, tk.NEWLINE, tk.NL):
self.stream.move()
self.log.debug("parsing docstring, token is %r (%s)",
self.current.kind, self.current.value)
if self.current.kind == tk.STRING:
docstring = Docstring(
self.current.value,
self.current.start[0],
self.current.end[0]
)
self.stream.move()
return docstring
return None
def parse_decorators(self):
"""Called after first @ is found.
Parse decorators into self._accumulated_decorators.
Continue to do so until encountering the 'def' or 'class' start token.
"""
name = []
arguments = []
at_arguments = False
while self.current is not None:
self.log.debug("parsing decorators, current token is %r (%s)",
self.current.kind, self.current.value)
if (self.current.kind == tk.NAME and
self.current.value in ['def', 'class']):
# Done with decorators - found function or class proper
break
elif self.current.kind == tk.OP and self.current.value == '@':
# New decorator found. Store the decorator accumulated so far:
self._accumulated_decorators.append(
Decorator(''.join(name), ''.join(arguments)))
# Now reset to begin accumulating the new decorator:
name = []
arguments = []
at_arguments = False
elif self.current.kind == tk.OP and self.current.value == '(':
at_arguments = True
elif self.current.kind == tk.OP and self.current.value == ')':
# Ignore close parenthesis
pass
elif self.current.kind == tk.NEWLINE or self.current.kind == tk.NL:
# Ignore newlines
pass
else:
# Keep accumulating current decorator's name or argument.
if not at_arguments:
name.append(self.current.value)
else:
arguments.append(self.current.value)
self.stream.move()
# Add decorator accumulated so far
self._accumulated_decorators.append(
Decorator(''.join(name), ''.join(arguments)))
def parse_definitions(self, class_, dunder_all=False):
"""Parse multiple definitions and yield them."""
while self.current is not None:
self.log.debug("parsing definition list, current token is %r (%s)",
self.current.kind, self.current.value)
self.log.debug('got_newline: %s', self.stream.got_logical_newline)
if dunder_all and self.current.value == '__all__':
self.parse_dunder_all()
elif (self.current.kind == tk.OP and
self.current.value == '@' and
self.stream.got_logical_newline):
self.consume(tk.OP)
self.parse_decorators()
elif self.current.value in ['def', 'class']:
yield self.parse_definition(class_._nest(self.current.value))
elif self.current.kind == tk.INDENT:
self.consume(tk.INDENT)
for definition in self.parse_definitions(class_):
yield definition
elif self.current.kind == tk.DEDENT:
self.consume(tk.DEDENT)
return
elif self.current.value == 'from':
self.parse_from_import_statement()
else:
self.stream.move()
def parse_dunder_all(self):
"""Parse the __all__ definition in a module."""
assert self.current.value == '__all__'
self.consume(tk.NAME)
# More than one __all__ definition means we ignore all __all__.
if self.dunder_all is not None or self.dunder_all_error is not None:
self.dunder_all = None
self.dunder_all_error = 'Could not evaluate contents of __all__. '
return
if self.current.value != '=':
self.dunder_all_error = 'Could not evaluate contents of __all__. '
return
self.consume(tk.OP)
if self.current.value not in '([':
self.dunder_all_error = 'Could not evaluate contents of __all__. '
return
self.consume(tk.OP)
dunder_all_content = "("
while self.current.kind != tk.OP or self.current.value not in ")]":
if self.current.kind in (tk.NL, tk.COMMENT):
pass
elif (self.current.kind == tk.STRING or
self.current.value == ','):
dunder_all_content += self.current.value
else:
self.dunder_all_error = (
'Unexpected token kind in __all__: {!r}. '
.format(self.current.kind))
return
self.stream.move()
self.consume(tk.OP)
dunder_all_content += ")"
try:
self.dunder_all = eval(dunder_all_content, {})
except BaseException as e:
self.dunder_all_error = (
'Could not evaluate contents of __all__.'
'\bThe value was {}. The exception was:\n{}'
.format(dunder_all_content, e))
while not self.current.kind in self.stream.LOGICAL_NEWLINES:
if self.current.kind != tk.COMMENT:
self.dunder_all = None
self.dunder_all_error = 'Could not evaluate contents of __all__. '
return
def parse_module(self):
"""Parse a module (and its children) and return a Module object."""
self.log.debug("parsing module.")
start = self.line
docstring = self.parse_docstring()
children = list(self.parse_definitions(Module, dunder_all=True))
assert self.current is None, self.current
end = self.line
cls = Module
if self.filename.endswith('__init__.py'):
cls = Package
module = cls(self.filename, self.source, start, end,
[], docstring, children, None, self.dunder_all,
self.dunder_all_error, None, '')
for child in module.children:
child.parent = module
module.future_imports = self.future_imports
self.log.debug("finished parsing module.")
return module
def parse_definition(self, class_):
"""Parse a definition and return its value in a `class_` object."""
start = self.line
self.consume(tk.NAME)
name = self.current.value
self.log.debug("parsing %s '%s'", class_.__name__, name)
self.stream.move()
if self.current.kind == tk.OP and self.current.value == '(':
parenthesis_level = 0
while True:
if self.current.kind == tk.OP:
if self.current.value == '(':
parenthesis_level += 1
elif self.current.value == ')':
parenthesis_level -= 1
if parenthesis_level == 0:
break
self.stream.move()
if self.current.kind != tk.OP or self.current.value != ':':
self.leapfrog(tk.OP, value=":")
else:
self.consume(tk.OP)
if self.current.kind in (tk.NEWLINE, tk.COMMENT):
skipped_error_codes = self.parse_skip_comment()
self.leapfrog(tk.INDENT)
assert self.current.kind != tk.INDENT
docstring = self.parse_docstring()
decorators = self._accumulated_decorators
self.log.debug("current accumulated decorators: %s", decorators)
self._accumulated_decorators = []
self.log.debug("parsing nested definitions.")
children = list(self.parse_definitions(class_))
self.log.debug("finished parsing nested definitions for '%s'",
name)
end = self.line - 1
else: # one-liner definition
skipped_error_codes = ''
docstring = self.parse_docstring()
decorators = [] # TODO
children = []
end = self.line
self.leapfrog(tk.NEWLINE)
definition = class_(name, self.source, start, end,
decorators, docstring, children, None,
skipped_error_codes)
for child in definition.children:
child.parent = definition
self.log.debug("finished parsing %s '%s'. Next token is %r",
class_.__name__, name, self.current)
return definition
def parse_skip_comment(self):
"""Parse a definition comment for noqa skips."""
skipped_error_codes = ''
if self.current.kind == tk.COMMENT:
if 'noqa: ' in self.current.value:
skipped_error_codes = ''.join(
self.current.value.split('noqa: ')[1:])
elif self.current.value.startswith('# noqa'):
skipped_error_codes = 'all'
return skipped_error_codes
def check_current(self, kind=None, value=None):
"""Verify the current token is of type `kind` and equals `value`."""
msg = textwrap.dedent("""
Unexpected token at line {self.line}:
In file: {self.filename}
Got kind {self.current.kind!r}
Got value {self.current.value}
""".format(self=self))
kind_valid = self.current.kind == kind if kind else True
value_valid = self.current.value == value if value else True
assert kind_valid and value_valid, msg
def parse_from_import_statement(self):
"""Parse a 'from x import y' statement.
The purpose is to find __future__ statements.
"""
self.log.debug('parsing from/import statement.')
is_future_import = self._parse_from_import_source()
self._parse_from_import_names(is_future_import)
def _parse_from_import_source(self):
"""Parse the 'from x import' part in a 'from x import y' statement.
Return true iff `x` is __future__.
"""
assert self.current.value == 'from', self.current.value
self.stream.move()
is_future_import = self.current.value == '__future__'
self.stream.move()
while (self.current is not None and
self.current.kind in (tk.DOT, tk.NAME, tk.OP) and
self.current.value != 'import'):
self.stream.move()
if self.current is None or self.current.value != 'import':
return False
self.check_current(value='import')
assert self.current.value == 'import', self.current.value
self.stream.move()
return is_future_import
def _parse_from_import_names(self, is_future_import):
"""Parse the 'y' part in a 'from x import y' statement."""
if self.current.value == '(':
self.consume(tk.OP)
expected_end_kinds = (tk.OP, )
else:
expected_end_kinds = (tk.NEWLINE, tk.ENDMARKER)
while self.current.kind not in expected_end_kinds and not (
self.current.kind == tk.OP and self.current.value == ';'):
if self.current.kind != tk.NAME:
self.stream.move()
continue
self.log.debug("parsing import, token is %r (%s)",
self.current.kind, self.current.value)
if is_future_import:
self.log.debug('found future import: %s', self.current.value)
self.future_imports.add(self.current.value)
self.consume(tk.NAME)
self.log.debug("parsing import, token is %r (%s)",
self.current.kind, self.current.value)
if self.current.kind == tk.NAME and self.current.value == 'as':
self.consume(tk.NAME) # as
if self.current.kind == tk.NAME:
self.consume(tk.NAME) # new name, irrelevant
if self.current.value == ',':
self.consume(tk.OP)
self.log.debug("parsing import, token is %r (%s)",
self.current.kind, self.current.value)