Source code for jube2.util.yaml_converter

# JUBE Benchmarking Environment
# Copyright (C) 2008-2024
# Forschungszentrum Juelich GmbH, Juelich Supercomputing Centre
# http://www.fz-juelich.de/jsc/jube
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""YAML to XML converter"""

from __future__ import (print_function,
                        unicode_literals,
                        division)

import xml.etree.ElementTree as etree
import xml.dom.minidom as DOM
try:
    import ruamel.yaml
except ImportError:
    pass
try:
    import yaml
except ImportError:
    pass
import jube2.log
import jube2.conf
import jube2.util.output
import os
import copy
import jube2.util.util
try:
    from StringIO import StringIO as IOStream
except ImportError:
    from io import BytesIO as IOStream

LOGGER = jube2.log.get_logger(__name__)


[docs]class YAML_Converter(object):

    """YAML to XML converter"""

    allowed_tags = \
        {"/": ["benchmark", "parameterset", "comment", "step",
               "fileset", "substituteset", "analyser", "result", "patternset",
               "selection", "include-path", "check_tags"],
         "/benchmark": ["benchmark", "parameterset", "fileset",
                        "substituteset", "patternset", "selection",
                        "include-path", "check_tags"],
         "benchmark": ["parameterset", "comment", "step", "fileset",
                       "substituteset", "analyser", "result", "patternset"],
         "analyse": ["file"], "analyser": ["use", "analyse"],
         "fileset": ["link", "copy", "prepare"],
         "include-path": ["path"], "parameterset": ["parameter"],
         "patternset": ["pattern"], "result": ["use", "table", "syslog", "database"],
         "selection": ["not", "only", "tag"], "step": ["use", "do"],
         "substituteset": ["iofile", "sub"], "syslog": ["key"],
         "table": ["column"], "database": ["key"]}

    def __init__(self, path, include_path=None, tags=None):
        self._path = path
        if include_path is None:
            include_path = []
        if tags is None:
            tags = set()
        self._include_path = list(include_path)
        self._include_path += [os.path.dirname(self._path)]
        self._tags = set(tags)
        try:
            yaml.add_constructor("!include", self.__yaml_include)
        except NameError:
            raise NameError("yaml module not available; either install it " +
                            "(https://pyyaml.org), or switch to .xml input " +
                            "files.")
        self._ignore_search_errors = True
        self._tags.update(self.__search_for_tags())
        old_tags = set(self._tags)
        changed = True
        counter = 0
        # It is possible to add new tags by including external files into a
        # selection block therefore the input must be scanned multiple times
        # to gather all available tags
        while changed and counter < jube2.conf.PREPROCESS_MAX_ITERATION:
            self._include_path = list(include_path) + \
                self.__search_for_include_pathes() + \
                [os.path.dirname(self._path)]
            self._tags.update(self.__search_for_tags())
            changed = len(self._tags.difference(old_tags)) > 0
            old_tags = set(self._tags)
            counter += 1
        self._ignore_search_errors = False
        self._int_file = IOStream()
        self.__convert()

    def __convert(self):
        """ Opens given file, make a Tree of it and print it """
        # Check the validity of the yaml file
        with open(self._path, "r") as file_handle:
            try:
                ruamel.yaml.YAML().load(file_handle)
            except NameError:
                pass
            except ruamel.yaml.constructor.DuplicateKeyError as e:
                e.note=""
                raise(e)

        LOGGER.debug("  Start YAML to XML file conversion for file {0}".format(
            self._path))

        # Read the yaml file and create an xml tree
        with open(self._path, "r") as file_handle:
            xmltree = etree.Element('jube')
            data = yaml.load(file_handle.read(), Loader=yaml.Loader)
            YAML_Converter.create_headtags(data, xmltree, self._include_path)
            xml = jube2.util.output.element_tree_tostring(
                xmltree, encoding="UTF-8")
            self._int_file.write(xml.encode('UTF-8'))
        LOGGER.debug("  YAML Conversion finalized")

[docs]    def read(self):
        """Read data of converted file"""
        return self._int_file.getvalue()

[docs]    def close(self):
        """Close converted file"""
        self._int_file.close()

    def __find_include_file(self, filename):
        """Search for filename in include-pathes and return resulting path"""
        for path in self._include_path:
            file_path = os.path.join(path, filename)
            if os.path.exists(file_path):
                break
        else:
            raise ValueError(("\"{0}\" not found in possible " +
                              "include pathes").format(filename))

        return file_path

    def __search_for_tags(self):
        """Search a YAML file for stored tag information"""
        tags = set()
        with open(self._path, "r") as file_handle:
            data = yaml.load(file_handle.read(), Loader=yaml.Loader)
            if "selection" in data and "tag" in data["selection"]:
                if type(data["selection"]["tag"]) is not list:
                    data["selection"]["tag"] = [data["selection"]["tag"]]
                for tag in data["selection"]["tag"]:
                    if not tag.startswith("!include "):
                        tags.update(
                            set(tag.split(jube2.conf.DEFAULT_SEPARATOR)))
        return tags

    def __search_for_include_pathes(self):
        """Search a YAML file for stored include-path information"""
        include_pathes = []
        with open(self._path, "r") as file_handle:
            data = yaml.load(file_handle.read(), Loader=yaml.Loader)
            # include-path is only allowed on the top level of the tree
            if "include-path" in data:
                if type(data["include-path"]) is not list:
                    data["include-path"] = [data["include-path"]]
                values = self.__search_for_pathes(data["include-path"])
                for val in values:
                    include_pathes.append(os.path.join(
                        os.path.dirname(self._path), val))
        return include_pathes

    def __search_for_pathes(self, data):
        """Search in given data for stored path informations"""
        paths = []
        for path in data:
            if type(path) is dict:
                if "tag" in path and not jube2.util.util.valid_tags(path["tag"], self._tags):
                    return
                value = path["path"] if "path" in path else path["_"]
                if type(value) is not list:
                    value = [value]
                path_val = self.__search_for_pathes(value)
                paths.extend(path_val)
            elif type(path) is list:
                path_val = self.__search_for_pathes(path)
                paths.extend(path_val)
            else:
                paths.append(path)
        return paths

    # adapted from
    # http://code.activestate.com/recipes/577613-yaml-include-support/
    def __yaml_include(self, loader, node):
        """ Constructor for the include tag"""
        yaml_node_data = node.value.split(":")
        try:
            file = self.__find_include_file(yaml_node_data[0])
            if os.path.normpath(file) == os.path.normpath(self._path):
                # Avoid recursive !include loops
                loader = yaml.BaseLoader
            else:
                loader = yaml.Loader
            with open(file) as inputfile:
                try:
                    _ = yaml.load(inputfile.read(), Loader=loader)
                except yaml.parser.ParserError:
                    LOGGER.error(("Including data from \"{0}\" into \"{1}\" " +
                                  "raised an error.").format(file, self._path))
                    raise
                inputfile.close()
                if len(yaml_node_data) > 1:
                    _ = eval("_" + yaml_node_data[1])
                    if len(yaml_node_data) > 2:
                        _ = eval(yaml_node_data[2])
                return _
        except ValueError as ve:
            if self._ignore_search_errors:
                return "!include {0}".format(node.value)
            else:
                raise ve

[docs]    @staticmethod
    def create_headtags(data, parent_node, include_pathes):
        """ Search for the headtags in given dictionary """
        if type(data) is not dict:
            data = {'benchmark': data}
        to_delete = list()
        for tag in data.keys():
            # Override include-path with parsed include-path
            if tag == "include-path":
                data[tag] = include_pathes
            if type(data[tag]) is not list:
                data[tag] = [data[tag]]
            # benchmark is optional on the top level, but if it is used only
            # a limited number of options are allowed on top level
            # (listed in "/benchmark")
            if "benchmark" in data and tag in YAML_Converter.allowed_tags[
                    "/benchmark"]:
                for attr_and_tags in data[tag]:
                    YAML_Converter.create_tag(tag, attr_and_tags, parent_node)
            elif "benchmark" not in data and \
                    tag in YAML_Converter.allowed_tags["/"]:
                if tag not in YAML_Converter.allowed_tags["benchmark"]:
                    for attr_and_tags in data[tag]:
                        YAML_Converter.create_tag(
                            tag, attr_and_tags, parent_node)
                    to_delete.append(tag)
        for tag in to_delete:
            del(data[tag])
        if "benchmark" not in data:
            YAML_Converter.create_tag("benchmark", data, parent_node)

[docs]    @staticmethod
    def create_tag(new_node_name, data, parent_node):
        """ Create the Subtag name, search for known tags
            and set the given attributes"""
        LOGGER.debug("    Create XML tag <{0}>".format(new_node_name))
        new_node = etree.SubElement(parent_node, new_node_name)
        # Check if tag can have subtags
        if new_node_name in YAML_Converter.allowed_tags and type(data) is dict:
            allowed_tags = YAML_Converter.allowed_tags[new_node_name]
            for key, value in data.items():
                if (type(value) is not list):
                    value = [value]
                for val in value:
                    if key in allowed_tags:
                        # Create new subtag
                        YAML_Converter.create_tag(key, val, new_node)
                    else:
                        # Create attribute
                        new_node.set(key, str(val) if val is not None else "")
        else:
            tag_value = ""
            if type(data) is not dict:
                # standard tag value
                tag_value = data if data is not None else ""
            else:
                for key, value in data.items():
                    if key == "_":
                        # _ represents the standard tag value
                        tag_value = value if value is not None else ""
                    else:
                        # Create attribute
                        new_node.set(key, str(value)
                                     if value is not None else "")
            if type(tag_value) is list:
                new_node.text = str(tag_value.pop(0))
                while len(tag_value) > 0:
                    new_node = copy.deepcopy(new_node)
                    parent_node.append(new_node)
                    new_node.text = str(tag_value.pop(0))
            else:
                new_node.text = str(tag_value)

[docs]    @staticmethod
    def is_parseable_yaml_file(filename):
        try:
            with open(filename, "r") as file_handle:
                if type(yaml.load(file_handle.read())) is str:
                    return False
                else:
                    return True
        except Exception as parseerror:
            return False