ansible-doctor/ansibledoctor/annotation.py

#!/usr/bin/env python3
"""Find and parse annotations to AnnotationItem objects."""

import json
import re
from collections import defaultdict

import anyconfig

from ansibledoctor.config import SingleConfig
from ansibledoctor.utils import SingleLog, _split_string


class AnnotationItem:
    """Handle annotations."""

    # next time improve this by looping over public available attributes
    def __init__(self):
        self.data = defaultdict(dict)

    def __str__(self):
        """Beautify object string output."""
        for key in self.data:
            for sub in self.data.get(key):
                return f"AnnotationItem({key}: {sub})"

    def get_obj(self):
        return self.data


class Annotation:
    """Handle annotations."""

    def __init__(self, name, files_registry):
        self._all_items = defaultdict(dict)
        self._file_handler = None
        self.config = SingleConfig()
        self.log = SingleLog()
        self.logger = self.log.logger
        self._files_registry = files_registry

        self._all_annotations = self.config.get_annotations_definition()

        if name in self._all_annotations:
            self._annotation_definition = self._all_annotations[name]

        if self._annotation_definition is not None:
            self._find_annotation()

    def get_details(self):
        return self._all_items

    def _find_annotation(self):
        regex = r"(\#\ *\@" + self._annotation_definition["name"] + r"\ +.*)"
        for rfile in self._files_registry.get_files():
            with open(rfile, encoding="utf8") as self._file_handler:
                num = 1
                while True:
                    line = self._file_handler.readline()
                    if not line:
                        break

                    if re.match(regex, line.strip()):
                        item = self._get_annotation_data(
                            num, line, self._annotation_definition["name"], rfile
                        )
                        if item:
                            self.logger.info(str(item))
                            self._populate_item(
                                item.get_obj().items(), self._annotation_definition["name"]
                            )
                    num += 1

    def _populate_item(self, item, name):
        allow_multiple = self.config.ANNOTATIONS.get(name)["allow_multiple"]

        for key, value in item:
            if allow_multiple:
                if key not in self._all_items:
                    self._all_items[key] = []
                self._all_items[key].append(value)
            else:
                try:
                    anyconfig.merge(self._all_items[key], value, ac_merge=anyconfig.MS_DICTS)
                except ValueError as e:
                    self.log.sysexit_with_message(f"Unable to merge annotation values:\n{e}")

    def _get_annotation_data(self, num, line, name, rfile):
        """
        Make some string conversion on a line in order to get the relevant data.

        :param line:
        """
        item = AnnotationItem()

        # step1 remove the annotation
        reg1 = r"(\#\ *\@" + name + r"\ *)"
        line1 = re.sub(reg1, "", line).strip()

        # step3 take the main key value from the annotation
        parts = [part.strip() for part in _split_string(line1, ":", "\\", 2)]
        key = str(parts[0])
        item.data[key] = {}
        multiline_char = [">", "$>"]

        if len(parts) < 2:
            return None

        if len(parts) == 2:
            parts = parts[:1] + ["value"] + parts[1:]

        subtypes = self.config.ANNOTATIONS.get(name)["subtypes"]
        if subtypes and parts[1] not in subtypes:
            return None

        content = [parts[2]]

        if parts[2] not in multiline_char and parts[2].startswith("$"):
            source = parts[2].replace("$", "").strip()
            content = self._str_to_json(key, source, rfile, num, line)

        item.data[key][parts[1]] = content

        # step4 check for multiline description
        if parts[2] in multiline_char:
            multiline = []
            stars_with_annotation = r"(\#\ *[\@][\w]+)"
            current_file_position = self._file_handler.tell()
            before = ""
            after = ""

            while True:
                next_line = self._file_handler.readline().lstrip()

                if not next_line.strip():
                    self._file_handler.seek(current_file_position)
                    break

                # match if annotation in line
                if re.match(stars_with_annotation, next_line):
                    self._file_handler.seek(current_file_position)
                    break

                # match if does not start with comment
                test_line2 = next_line.strip()
                if test_line2[:1] != "#":
                    self._file_handler.seek(current_file_position)
                    break

                final = re.findall(r"\#(.*)", next_line)[0].rstrip()
                if final[:1] == " ":
                    final = final[1:]
                final = before + final

                # match if empty line or commented empty line
                test_line = next_line.replace("#", "").strip()
                if len(test_line) == 0:
                    before = "\n\n"
                    continue
                before = ""

                if test_line.endswith("\\"):
                    final = final.rstrip("\\").strip()
                    after = "\n"
                else:
                    after = ""

                multiline.append(before + final + after)

            if parts[2].startswith("$"):
                source = "".join([x.strip() for x in multiline])
                multiline = self._str_to_json(key, source, rfile, num, line)

            item.data[key][parts[1]] = multiline
        return item

    def _str_to_json(self, key, string, rfile, num, line):
        try:
            return {key: json.loads(string)}
        except ValueError:
            self.log.sysexit_with_message(
                "Json value error: Can't parse json in {}:{}:\n{}".format(
                    rfile, str(num), line.strip()
                )
            )
fork; initial commit 2019-10-07 06:52:00 +00:00			`#!/usr/bin/env python3`
add missing docstrings 2019-10-08 09:39:27 +00:00			`"""Find and parse annotations to AnnotationItem objects."""`
fork; initial commit 2019-10-07 06:52:00 +00:00
			`import json`
			`import re`
			`from collections import defaultdict`

			`import anyconfig`

refactor: rename modules to reflect pep8 recommendations (#27) 2021-01-01 12:50:41 +00:00			`from ansibledoctor.config import SingleConfig`
fix: use custom method for annotation split to allow escape chars (#432) 2023-01-22 14:02:09 +00:00			`from ansibledoctor.utils import SingleLog, _split_string`
fork; initial commit 2019-10-07 06:52:00 +00:00

			`class AnnotationItem:`
add yapf as formatter 2020-04-05 21:16:53 +00:00			`"""Handle annotations."""`
fork; initial commit 2019-10-07 06:52:00 +00:00
			`# next time improve this by looping over public available attributes`
			`def __init__(self):`
			`self.data = defaultdict(dict)`

refactor logging anf configuration handling 2019-10-07 12:44:45 +00:00			`def __str__(self):`
add missing docstrings 2019-10-08 09:39:27 +00:00			`"""Beautify object string output."""`
refctor: migrate flake8 to ruff python linter (#429) 2023-01-20 10:56:12 +00:00			`for key in self.data:`
refactor logging anf configuration handling 2019-10-07 12:44:45 +00:00			`for sub in self.data.get(key):`
refctor: migrate flake8 to ruff python linter (#429) 2023-01-20 10:56:12 +00:00			`return f"AnnotationItem({key}: {sub})"`
refactor logging anf configuration handling 2019-10-07 12:44:45 +00:00
fork; initial commit 2019-10-07 06:52:00 +00:00			`def get_obj(self):`
			`return self.data`


			`class Annotation:`
add yapf as formatter 2020-04-05 21:16:53 +00:00			`"""Handle annotations."""`

fork; initial commit 2019-10-07 06:52:00 +00:00			`def __init__(self, name, files_registry):`
			`self._all_items = defaultdict(dict)`
			`self._file_handler = None`
			`self.config = SingleConfig()`
use an explicit marker symbol to value2json conversion 2019-10-15 07:54:03 +00:00			`self.log = SingleLog()`
			`self.logger = self.log.logger`
fork; initial commit 2019-10-07 06:52:00 +00:00			`self._files_registry = files_registry`

			`self._all_annotations = self.config.get_annotations_definition()`

refctor: migrate flake8 to ruff python linter (#429) 2023-01-20 10:56:12 +00:00			`if name in self._all_annotations:`
fork; initial commit 2019-10-07 06:52:00 +00:00			`self._annotation_definition = self._all_annotations[name]`

			`if self._annotation_definition is not None:`
			`self._find_annotation()`

			`def get_details(self):`
			`return self._all_items`

			`def _find_annotation(self):`
			`regex = r"(\#\ \@" + self._annotation_definition["name"] + r"\ +.)"`
			`for rfile in self._files_registry.get_files():`
refctor: migrate flake8 to ruff python linter (#429) 2023-01-20 10:56:12 +00:00			`with open(rfile, encoding="utf8") as self._file_handler:`
			`num = 1`
			`while True:`
			`line = self._file_handler.readline()`
			`if not line:`
			`break`

			`if re.match(regex, line.strip()):`
			`item = self._get_annotation_data(`
			`num, line, self._annotation_definition["name"], rfile`
feat: implement todo identifiers (#246) 2022-02-21 20:38:47 +00:00			`)`
refctor: migrate flake8 to ruff python linter (#429) 2023-01-20 10:56:12 +00:00			`if item:`
			`self.logger.info(str(item))`
			`self._populate_item(`
			`item.get_obj().items(), self._annotation_definition["name"]`
			`)`
			`num += 1`
fork; initial commit 2019-10-07 06:52:00 +00:00
feat: implement todo identifiers (#246) 2022-02-21 20:38:47 +00:00			`def _populate_item(self, item, name):`
			`allow_multiple = self.config.ANNOTATIONS.get(name)["allow_multiple"]`

fork; initial commit 2019-10-07 06:52:00 +00:00			`for key, value in item:`
feat: implement todo identifiers (#246) 2022-02-21 20:38:47 +00:00			`if allow_multiple:`
			`if key not in self._all_items:`
			`self._all_items[key] = []`
			`self._all_items[key].append(value)`
			`else:`
			`try:`
			`anyconfig.merge(self._all_items[key], value, ac_merge=anyconfig.MS_DICTS)`
			`except ValueError as e:`
refctor: migrate flake8 to ruff python linter (#429) 2023-01-20 10:56:12 +00:00			`self.log.sysexit_with_message(f"Unable to merge annotation values:\n{e}")`
fork; initial commit 2019-10-07 06:52:00 +00:00
use an explicit marker symbol to value2json conversion 2019-10-15 07:54:03 +00:00			`def _get_annotation_data(self, num, line, name, rfile):`
fork; initial commit 2019-10-07 06:52:00 +00:00			`"""`
			`Make some string conversion on a line in order to get the relevant data.`

			`:param line:`
			`"""`
			`item = AnnotationItem()`

			`# step1 remove the annotation`
			`reg1 = r"(\#\ \@" + name + r"\ )"`
			`line1 = re.sub(reg1, "", line).strip()`

			`# step3 take the main key value from the annotation`
fix: use custom method for annotation split to allow escape chars (#432) 2023-01-22 14:02:09 +00:00			`parts = [part.strip() for part in _split_string(line1, ":", "\\", 2)]`
fork; initial commit 2019-10-07 06:52:00 +00:00			`key = str(parts[0])`
			`item.data[key] = {}`
use an explicit marker symbol to value2json conversion 2019-10-15 07:54:03 +00:00			`multiline_char = [">", "$>"]`
fork; initial commit 2019-10-07 06:52:00 +00:00
			`if len(parts) < 2:`
refctor: migrate flake8 to ruff python linter (#429) 2023-01-20 10:56:12 +00:00			`return None`
fork; initial commit 2019-10-07 06:52:00 +00:00
			`if len(parts) == 2:`
			`parts = parts[:1] + ["value"] + parts[1:]`

use an explicit marker symbol to value2json conversion 2019-10-15 07:54:03 +00:00			`subtypes = self.config.ANNOTATIONS.get(name)["subtypes"]`
			`if subtypes and parts[1] not in subtypes:`
refctor: migrate flake8 to ruff python linter (#429) 2023-01-20 10:56:12 +00:00			`return None`
use an explicit marker symbol to value2json conversion 2019-10-15 07:54:03 +00:00
			`content = [parts[2]]`

			`if parts[2] not in multiline_char and parts[2].startswith("$"):`
			`source = parts[2].replace("$", "").strip()`
			`content = self._str_to_json(key, source, rfile, num, line)`
fork; initial commit 2019-10-07 06:52:00 +00:00
			`item.data[key][parts[1]] = content`

			`# step4 check for multiline description`
			`if parts[2] in multiline_char:`
			`multiline = []`
			`stars_with_annotation = r"(\#\ *[\@][\w]+)"`
			`current_file_position = self._file_handler.tell()`
fix: allow to force line breaks using '\' in multiline strings (#255) 2022-03-02 13:33:37 +00:00			`before = ""`
			`after = ""`
fork; initial commit 2019-10-07 06:52:00 +00:00
			`while True:`
fix newline handling 2019-12-02 14:00:32 +00:00			`next_line = self._file_handler.readline().lstrip()`
fork; initial commit 2019-10-07 06:52:00 +00:00
			`if not next_line.strip():`
			`self._file_handler.seek(current_file_position)`
			`break`

			`# match if annotation in line`
			`if re.match(stars_with_annotation, next_line):`
			`self._file_handler.seek(current_file_position)`
			`break`
use an explicit marker symbol to value2json conversion 2019-10-15 07:54:03 +00:00
fork; initial commit 2019-10-07 06:52:00 +00:00			`# match if does not start with comment`
			`test_line2 = next_line.strip()`
			`if test_line2[:1] != "#":`
			`self._file_handler.seek(current_file_position)`
			`break`

remove only first hashtag from a multiline comment 2020-03-04 15:59:02 +00:00			`final = re.findall(r"\#(.*)", next_line)[0].rstrip()`
fix newline handling 2019-12-02 15:44:57 +00:00			`if final[:1] == " ":`
			`final = final[1:]`
fix: allow to force line breaks using '\' in multiline strings (#255) 2022-03-02 13:33:37 +00:00			`final = before + final`
fix newline handling 2019-12-02 15:44:57 +00:00
fix newline handling 2019-12-02 14:00:32 +00:00			`# match if empty line or commented empty line`
			`test_line = next_line.replace("#", "").strip()`
			`if len(test_line) == 0:`
fix: allow to force line breaks using '\' in multiline strings (#255) 2022-03-02 13:33:37 +00:00			`before = "\n\n"`
fix newline handling 2019-12-02 14:00:32 +00:00			`continue`
refctor: migrate flake8 to ruff python linter (#429) 2023-01-20 10:56:12 +00:00			`before = ""`
fix newline handling 2019-12-02 15:44:57 +00:00
fix: allow to force line breaks using '\' in multiline strings (#255) 2022-03-02 13:33:37 +00:00			`if test_line.endswith("\\"):`
			`final = final.rstrip("\\").strip()`
			`after = "\n"`
			`else:`
			`after = ""`

			`multiline.append(before + final + after)`
fork; initial commit 2019-10-07 06:52:00 +00:00
use an explicit marker symbol to value2json conversion 2019-10-15 07:54:03 +00:00			`if parts[2].startswith("$"):`
			`source = "".join([x.strip() for x in multiline])`
			`multiline = self._str_to_json(key, source, rfile, num, line)`
fork; initial commit 2019-10-07 06:52:00 +00:00
use an explicit marker symbol to value2json conversion 2019-10-15 07:54:03 +00:00			`item.data[key][parts[1]] = multiline`
fork; initial commit 2019-10-07 06:52:00 +00:00			`return item`
use an explicit marker symbol to value2json conversion 2019-10-15 07:54:03 +00:00
			`def _str_to_json(self, key, string, rfile, num, line):`
			`try:`
			`return {key: json.loads(string)}`
			`except ValueError:`
			`self.log.sysexit_with_message(`
add yapf as formatter 2020-04-05 21:16:53 +00:00			`"Json value error: Can't parse json in {}:{}:\n{}".format(`
			`rfile, str(num), line.strip()`
			`)`
			`)`