Files
api/path/to/venv/lib/python3.12/site-packages/mammoth/docx/xmlparser.py
2025-12-30 11:27:14 +07:00

122 lines
3.1 KiB
Python

import xml.dom.minidom
import cobble
@cobble.data
class XmlElement(object):
name = cobble.field()
attributes = cobble.field()
children = cobble.field()
def find_child_or_null(self, name):
return self.find_child(name) or null_xml_element
def find_child(self, name):
for child in self.children:
if isinstance(child, XmlElement) and child.name == name:
return child
def find_children(self, name):
return XmlElementList(filter(
lambda child: child.node_type == node_types.element and child.name == name,
self.children
))
class XmlElementList(object):
def __init__(self, elements):
self._elements = elements
def __iter__(self):
return iter(self._elements)
def find_children(self, name):
children = []
for element in self._elements:
for child in element.find_children(name):
children.append(child)
return XmlElementList(children)
class NullXmlElement(object):
attributes = {}
children = []
def find_child_or_null(self, name):
return self
def find_child(self, name):
return None
null_xml_element = NullXmlElement()
@cobble.data
class XmlText(object):
value = cobble.field()
def element(name, attributes=None, children=None):
return XmlElement(name, attributes or {}, children or [])
text = XmlText
class node_types(object):
element = 1
text = 3
XmlElement.node_type = node_types.element
XmlText.node_type = node_types.text
def parse_xml(fileobj, namespace_mapping=None):
if namespace_mapping is None:
namespace_prefixes = {}
else:
namespace_prefixes = dict((uri, prefix) for prefix, uri in namespace_mapping)
document = xml.dom.minidom.parse(fileobj)
def convert_node(node):
if node.nodeType == xml.dom.Node.ELEMENT_NODE:
return convert_element(node)
elif node.nodeType == xml.dom.Node.TEXT_NODE:
return XmlText(node.nodeValue)
else:
return None
def convert_element(element):
converted_name = convert_name(element)
converted_attributes = dict(
(convert_name(attribute), attribute.value)
for attribute in element.attributes.values()
if attribute.namespaceURI != "http://www.w3.org/2000/xmlns/"
)
converted_children = []
for child_node in element.childNodes:
converted_child_node = convert_node(child_node)
if converted_child_node is not None:
converted_children.append(converted_child_node)
return XmlElement(converted_name, converted_attributes, converted_children)
def convert_name(node):
if node.namespaceURI is None:
return node.localName
else:
prefix = namespace_prefixes.get(node.namespaceURI)
if prefix is None:
return "{%s}%s" % (node.namespaceURI, node.localName)
else:
return "%s:%s" % (prefix, node.localName)
return convert_node(document.documentElement)