"""Wordlists loaded from package data. We can treat them as part of the code for the imperative mood check, and therefore we load them at import time, rather than on-demand. """ import re import pkgutil import snowballstemmer #: Regular expression for stripping comments from the wordlists COMMENT_RE = re.compile(r'\s*#.*') #: Stemmer function for stemming words in English stem = snowballstemmer.stemmer('english').stemWord def load_wordlist(name): """Iterate over lines of a wordlist data file. `name` should be the name of a package data file within the data/ directory. Whitespace and #-prefixed comments are stripped from each line. """ text = pkgutil.get_data('pydocstyle', 'data/' + name).decode('utf8') for line in text.splitlines(): line = COMMENT_RE.sub('', line).strip() if line: yield line #: A dict mapping stemmed verbs to the imperative form IMPERATIVE_VERBS = {stem(v): v for v in load_wordlist('imperatives.txt')} #: Words that are forbidden to appear as the first word in a docstring IMPERATIVE_BLACKLIST = set(load_wordlist('imperatives_blacklist.txt'))