Source code for schema_parser.helpers

#  Copyright (c) 2022-2023.  OCX Consortium https://3docx.org. See the LICENSE

import re
from dataclasses import asdict
from typing import Dict
from typing import List
from typing import Union

from lxml.etree import Element
from lxml.etree import ElementTextIterator

from .data_classes import SchemaChange
from .xelement import LxmlElement


[docs]class SchemaHelper:
    """A utility class for retrieving OCX attributes and information from an OCX xsd element"""

[docs]    @classmethod
    def is_reference(cls, element: Element) -> bool:
        """Is a reference or not

        Returns:
            True if the element is a reference, False otherwise

        """
        reference = cls.get_reference(element) != "None"
        return reference

[docs]    @classmethod
    def get_reference(cls, element: Element) -> Union[str, None]:
        """The element reference

        Returns:
            The reference to a global element on the form ``prefix:name``.
            Returns None if the element is not a reference.

        """
        attributes = LxmlElement.get_xml_attrib(element)
        ref = attributes.get("ref")
        if ref is None:
            ref = "None"
        return ref

[docs]    @staticmethod
    def get_type(element: Element) -> str:
        """The element type given by the element attribute or by its ``complexContent``

        Returns:
            The global element type on the form ``prefix:name``.
            If the element has no type, ``untyped`` is returned.

        """
        schema_type = None
        attributes = LxmlElement.get_xml_attrib(element)
        if "type" in attributes:
            schema_type = attributes["type"]
        if "base" in attributes:
            schema_type = attributes["base"]
        if "ref" in attributes:
            schema_type = attributes["ref"]
        # The element may have complexContent
        if len(LxmlElement.find_all_children_with_name(element, "complexContent")) > 0:
            # complexContent has either an extension or a restriction
            # extension
            base = LxmlElement.find_all_children_with_name_and_attribute(element, "extension", "base")
            if len(base) > 0:
                schema_type = base[0].get("base")
            # restriction
            base = LxmlElement.find_all_children_with_name_and_attribute(element, "restriction", "base")
            if len(base) > 0:
                schema_type = base[0].get("base")
        # the element may be a simpleType
        simple_type = LxmlElement.find_all_children_with_name(element, "simpleType")
        if len(simple_type) > 0:
            # simpleType may have either an extension or a restriction
            # extension
            base = LxmlElement.find_all_children_with_name_and_attribute(simple_type[0], "extension", "base")
            if len(base) > 0:
                schema_type = base[0].get("base")
            # restriction
            base = LxmlElement.find_all_children_with_name_and_attribute(simple_type[0], "restriction", "base")
            if len(base) > 0:
                schema_type = base[0].get("base")

        # if schemaType is not None:
        #     # Add any missing prefix
        #     if ns_prefix(schemaType) is None:
        #         base = element.base
        #         if base in self.schema.schemaBase:
        #             prefix = self.schema.schemaBase[base]
        #             schemaType = prefix + ":" + schemaType
        # else:
        #     schemaType = "untyped"
        return schema_type

[docs]    @staticmethod
    def unique_tag(name: str, namespace: str) -> str:
        """A unique global tag from the element name and namespace

        Args:
            name: The name of the element
            namespace: The namespace

        Returns:
            A unique element tag on the form ``{namespace}name``

        """

        tag = "{" + namespace + "}" + name
        return tag

[docs]    @staticmethod
    def get_schema_version(root: Element) -> str:
        """Get the current OCX schema version

        Args:
            root: The root element of the schema

        Returns:
            The  version of the OCX schema

        """
        version = "Missing"
        # root.findall('.//{*}attribute[@name="schemaVersion"]'
        element = LxmlElement.find_all_children_with_attribute_value(root, "attribute", "name", "schemaVersion")
        if len(element) > 0:
            version = element[0].get("fixed")
        return version

[docs]    @staticmethod
    def find_schema_changes(root: Element) -> List[SchemaChange]:
        """Find any schema version changes with tag ``SchemaChange``

        Args:
            root: The root element of the schema

        Returns:

             A list of ``SchemaChange`` dataclasses

        """
        schema_changes = []
        changes = LxmlElement.find_all_children_with_name(root, "SchemaChange")
        for change in changes:
            # Retrieve the reason for change from the Description element
            description = LxmlElement.find_all_children_with_name(change, "Description")
            # Parse the text between start and end tag
            if len(description) > 0:
                description = text = ""
                for text in ElementTextIterator(change[0], with_tail=False):
                    description = description + text
                    text = re.sub("[\n\t\r]", "", description)
                description = text
            schema_change = SchemaChange(
                change.get("version"),
                change.get("author"),
                change.get("date"),
                description,
            )
            schema_changes.append(schema_change)
        return schema_changes

[docs]    @classmethod
    def schema_changes_data_grid(cls, root: Element) -> Dict:
        """A dictionary of the content  of all ``SchemaChange`` tags

        Args:
            root: The root element of the schema

        Returns:

             A dict dta grid with a unique id as key
        """
        changes = cls.find_schema_changes(root)
        data_grid = {}
        i = 0
        for change in changes:
            c = f"{i:05d}"
            data_grid[c] = asdict(change)
            i = i + 1
        return {key: value for key, value in sorted(data_grid.items())}