Source code for schema_parser.helpers

#  Copyright (c) 2022-2023.  OCX Consortium https://3docx.org. See the LICENSE

import re
from dataclasses import asdict
from typing import Dict
from typing import List
from typing import Union

from lxml.etree import Element
from lxml.etree import ElementTextIterator

from .data_classes import SchemaChange
from .xelement import LxmlElement


[docs]class SchemaHelper: """A utility class for retrieving OCX attributes and information from an OCX xsd element"""
[docs] @classmethod def is_reference(cls, element: Element) -> bool: """Is a reference or not Returns: True if the element is a reference, False otherwise """ reference = cls.get_reference(element) != "None" return reference
[docs] @classmethod def get_reference(cls, element: Element) -> Union[str, None]: """The element reference Returns: The reference to a global element on the form ``prefix:name``. Returns None if the element is not a reference. """ attributes = LxmlElement.get_xml_attrib(element) ref = attributes.get("ref") if ref is None: ref = "None" return ref
[docs] @staticmethod def get_type(element: Element) -> str: """The element type given by the element attribute or by its ``complexContent`` Returns: The global element type on the form ``prefix:name``. If the element has no type, ``untyped`` is returned. """ schema_type = None attributes = LxmlElement.get_xml_attrib(element) if "type" in attributes: schema_type = attributes["type"] if "base" in attributes: schema_type = attributes["base"] if "ref" in attributes: schema_type = attributes["ref"] # The element may have complexContent if len(LxmlElement.find_all_children_with_name(element, "complexContent")) > 0: # complexContent has either an extension or a restriction # extension base = LxmlElement.find_all_children_with_name_and_attribute(element, "extension", "base") if len(base) > 0: schema_type = base[0].get("base") # restriction base = LxmlElement.find_all_children_with_name_and_attribute(element, "restriction", "base") if len(base) > 0: schema_type = base[0].get("base") # the element may be a simpleType simple_type = LxmlElement.find_all_children_with_name(element, "simpleType") if len(simple_type) > 0: # simpleType may have either an extension or a restriction # extension base = LxmlElement.find_all_children_with_name_and_attribute(simple_type[0], "extension", "base") if len(base) > 0: schema_type = base[0].get("base") # restriction base = LxmlElement.find_all_children_with_name_and_attribute(simple_type[0], "restriction", "base") if len(base) > 0: schema_type = base[0].get("base") # if schemaType is not None: # # Add any missing prefix # if ns_prefix(schemaType) is None: # base = element.base # if base in self.schema.schemaBase: # prefix = self.schema.schemaBase[base] # schemaType = prefix + ":" + schemaType # else: # schemaType = "untyped" return schema_type
[docs] @staticmethod def unique_tag(name: str, namespace: str) -> str: """A unique global tag from the element name and namespace Args: name: The name of the element namespace: The namespace Returns: A unique element tag on the form ``{namespace}name`` """ tag = "{" + namespace + "}" + name return tag
[docs] @staticmethod def get_schema_version(root: Element) -> str: """Get the current OCX schema version Args: root: The root element of the schema Returns: The version of the OCX schema """ version = "Missing" # root.findall('.//{*}attribute[@name="schemaVersion"]' element = LxmlElement.find_all_children_with_attribute_value(root, "attribute", "name", "schemaVersion") if len(element) > 0: version = element[0].get("fixed") return version
[docs] @staticmethod def find_schema_changes(root: Element) -> List[SchemaChange]: """Find any schema version changes with tag ``SchemaChange`` Args: root: The root element of the schema Returns: A list of ``SchemaChange`` dataclasses """ schema_changes = [] changes = LxmlElement.find_all_children_with_name(root, "SchemaChange") for change in changes: # Retrieve the reason for change from the Description element description = LxmlElement.find_all_children_with_name(change, "Description") # Parse the text between start and end tag if len(description) > 0: description = text = "" for text in ElementTextIterator(change[0], with_tail=False): description = description + text text = re.sub("[\n\t\r]", "", description) description = text schema_change = SchemaChange( change.get("version"), change.get("author"), change.get("date"), description, ) schema_changes.append(schema_change) return schema_changes
[docs] @classmethod def schema_changes_data_grid(cls, root: Element) -> Dict: """A dictionary of the content of all ``SchemaChange`` tags Args: root: The root element of the schema Returns: A dict dta grid with a unique id as key """ changes = cls.find_schema_changes(root) data_grid = {} i = 0 for change in changes: c = f"{i:05d}" data_grid[c] = asdict(change) i = i + 1 return {key: value for key, value in sorted(data_grid.items())}