Initial commit (Clean history)

This commit is contained in:
anhduy-tech
2025-12-30 11:27:14 +07:00
commit ef48c93de0
19255 changed files with 3248867 additions and 0 deletions

View File

@@ -0,0 +1,251 @@
# ruff: noqa: E402, I001
"""Initializes oxml sub-package.
This including registering custom element classes corresponding to Open XML elements.
"""
from __future__ import annotations
from docx.oxml.drawing import CT_Drawing
from docx.oxml.parser import OxmlElement, parse_xml, register_element_cls
from docx.oxml.shape import (
CT_Anchor,
CT_Blip,
CT_BlipFillProperties,
CT_GraphicalObject,
CT_GraphicalObjectData,
CT_Inline,
CT_NonVisualDrawingProps,
CT_Picture,
CT_PictureNonVisual,
CT_Point2D,
CT_PositiveSize2D,
CT_ShapeProperties,
CT_Transform2D,
)
from docx.oxml.shared import CT_DecimalNumber, CT_OnOff, CT_String
from docx.oxml.text.hyperlink import CT_Hyperlink
from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
from docx.oxml.text.run import (
CT_R,
CT_Br,
CT_Cr,
CT_NoBreakHyphen,
CT_PTab,
CT_Text,
)
# -- `OxmlElement` and `parse_xml()` are not used in this module but several downstream
# -- "extension" packages expect to find them here and there's no compelling reason
# -- not to republish them here so those keep working.
__all__ = ["OxmlElement", "parse_xml"]
# ---------------------------------------------------------------------------
# DrawingML-related elements
register_element_cls("a:blip", CT_Blip)
register_element_cls("a:ext", CT_PositiveSize2D)
register_element_cls("a:graphic", CT_GraphicalObject)
register_element_cls("a:graphicData", CT_GraphicalObjectData)
register_element_cls("a:off", CT_Point2D)
register_element_cls("a:xfrm", CT_Transform2D)
register_element_cls("pic:blipFill", CT_BlipFillProperties)
register_element_cls("pic:cNvPr", CT_NonVisualDrawingProps)
register_element_cls("pic:nvPicPr", CT_PictureNonVisual)
register_element_cls("pic:pic", CT_Picture)
register_element_cls("pic:spPr", CT_ShapeProperties)
register_element_cls("w:drawing", CT_Drawing)
register_element_cls("wp:anchor", CT_Anchor)
register_element_cls("wp:docPr", CT_NonVisualDrawingProps)
register_element_cls("wp:extent", CT_PositiveSize2D)
register_element_cls("wp:inline", CT_Inline)
# ---------------------------------------------------------------------------
# hyperlink-related elements
register_element_cls("w:hyperlink", CT_Hyperlink)
# ---------------------------------------------------------------------------
# text-related elements
register_element_cls("w:br", CT_Br)
register_element_cls("w:cr", CT_Cr)
register_element_cls("w:lastRenderedPageBreak", CT_LastRenderedPageBreak)
register_element_cls("w:noBreakHyphen", CT_NoBreakHyphen)
register_element_cls("w:ptab", CT_PTab)
register_element_cls("w:r", CT_R)
register_element_cls("w:t", CT_Text)
# ---------------------------------------------------------------------------
# header/footer-related mappings
register_element_cls("w:evenAndOddHeaders", CT_OnOff)
register_element_cls("w:titlePg", CT_OnOff)
# ---------------------------------------------------------------------------
# other custom element class mappings
from .comments import CT_Comments, CT_Comment
register_element_cls("w:comments", CT_Comments)
register_element_cls("w:comment", CT_Comment)
from .coreprops import CT_CoreProperties
register_element_cls("cp:coreProperties", CT_CoreProperties)
from .document import CT_Body, CT_Document
register_element_cls("w:body", CT_Body)
register_element_cls("w:document", CT_Document)
from .numbering import CT_Num, CT_Numbering, CT_NumLvl, CT_NumPr
register_element_cls("w:abstractNumId", CT_DecimalNumber)
register_element_cls("w:ilvl", CT_DecimalNumber)
register_element_cls("w:lvlOverride", CT_NumLvl)
register_element_cls("w:num", CT_Num)
register_element_cls("w:numId", CT_DecimalNumber)
register_element_cls("w:numPr", CT_NumPr)
register_element_cls("w:numbering", CT_Numbering)
register_element_cls("w:startOverride", CT_DecimalNumber)
from .section import (
CT_HdrFtr,
CT_HdrFtrRef,
CT_PageMar,
CT_PageSz,
CT_SectPr,
CT_SectType,
)
register_element_cls("w:footerReference", CT_HdrFtrRef)
register_element_cls("w:ftr", CT_HdrFtr)
register_element_cls("w:hdr", CT_HdrFtr)
register_element_cls("w:headerReference", CT_HdrFtrRef)
register_element_cls("w:pgMar", CT_PageMar)
register_element_cls("w:pgSz", CT_PageSz)
register_element_cls("w:sectPr", CT_SectPr)
register_element_cls("w:type", CT_SectType)
from .settings import CT_Settings
register_element_cls("w:settings", CT_Settings)
from .styles import CT_LatentStyles, CT_LsdException, CT_Style, CT_Styles
register_element_cls("w:basedOn", CT_String)
register_element_cls("w:latentStyles", CT_LatentStyles)
register_element_cls("w:locked", CT_OnOff)
register_element_cls("w:lsdException", CT_LsdException)
register_element_cls("w:name", CT_String)
register_element_cls("w:next", CT_String)
register_element_cls("w:qFormat", CT_OnOff)
register_element_cls("w:semiHidden", CT_OnOff)
register_element_cls("w:style", CT_Style)
register_element_cls("w:styles", CT_Styles)
register_element_cls("w:uiPriority", CT_DecimalNumber)
register_element_cls("w:unhideWhenUsed", CT_OnOff)
from .table import (
CT_Height,
CT_Row,
CT_Tbl,
CT_TblGrid,
CT_TblGridCol,
CT_TblLayoutType,
CT_TblPr,
CT_TblPrEx,
CT_TblWidth,
CT_Tc,
CT_TcPr,
CT_TrPr,
CT_VMerge,
CT_VerticalJc,
)
register_element_cls("w:bidiVisual", CT_OnOff)
register_element_cls("w:gridAfter", CT_DecimalNumber)
register_element_cls("w:gridBefore", CT_DecimalNumber)
register_element_cls("w:gridCol", CT_TblGridCol)
register_element_cls("w:gridSpan", CT_DecimalNumber)
register_element_cls("w:tbl", CT_Tbl)
register_element_cls("w:tblGrid", CT_TblGrid)
register_element_cls("w:tblLayout", CT_TblLayoutType)
register_element_cls("w:tblPr", CT_TblPr)
register_element_cls("w:tblPrEx", CT_TblPrEx)
register_element_cls("w:tblStyle", CT_String)
register_element_cls("w:tc", CT_Tc)
register_element_cls("w:tcPr", CT_TcPr)
register_element_cls("w:tcW", CT_TblWidth)
register_element_cls("w:tr", CT_Row)
register_element_cls("w:trHeight", CT_Height)
register_element_cls("w:trPr", CT_TrPr)
register_element_cls("w:vAlign", CT_VerticalJc)
register_element_cls("w:vMerge", CT_VMerge)
from .text.font import (
CT_Color,
CT_Fonts,
CT_Highlight,
CT_HpsMeasure,
CT_RPr,
CT_Underline,
CT_VerticalAlignRun,
)
register_element_cls("w:b", CT_OnOff)
register_element_cls("w:bCs", CT_OnOff)
register_element_cls("w:caps", CT_OnOff)
register_element_cls("w:color", CT_Color)
register_element_cls("w:cs", CT_OnOff)
register_element_cls("w:dstrike", CT_OnOff)
register_element_cls("w:emboss", CT_OnOff)
register_element_cls("w:highlight", CT_Highlight)
register_element_cls("w:i", CT_OnOff)
register_element_cls("w:iCs", CT_OnOff)
register_element_cls("w:imprint", CT_OnOff)
register_element_cls("w:noProof", CT_OnOff)
register_element_cls("w:oMath", CT_OnOff)
register_element_cls("w:outline", CT_OnOff)
register_element_cls("w:rFonts", CT_Fonts)
register_element_cls("w:rPr", CT_RPr)
register_element_cls("w:rStyle", CT_String)
register_element_cls("w:rtl", CT_OnOff)
register_element_cls("w:shadow", CT_OnOff)
register_element_cls("w:smallCaps", CT_OnOff)
register_element_cls("w:snapToGrid", CT_OnOff)
register_element_cls("w:specVanish", CT_OnOff)
register_element_cls("w:strike", CT_OnOff)
register_element_cls("w:sz", CT_HpsMeasure)
register_element_cls("w:u", CT_Underline)
register_element_cls("w:vanish", CT_OnOff)
register_element_cls("w:vertAlign", CT_VerticalAlignRun)
register_element_cls("w:webHidden", CT_OnOff)
from .text.paragraph import CT_P
register_element_cls("w:p", CT_P)
from .text.parfmt import (
CT_Ind,
CT_Jc,
CT_PPr,
CT_Spacing,
CT_TabStop,
CT_TabStops,
)
register_element_cls("w:ind", CT_Ind)
register_element_cls("w:jc", CT_Jc)
register_element_cls("w:keepLines", CT_OnOff)
register_element_cls("w:keepNext", CT_OnOff)
register_element_cls("w:outlineLvl", CT_DecimalNumber)
register_element_cls("w:pageBreakBefore", CT_OnOff)
register_element_cls("w:pPr", CT_PPr)
register_element_cls("w:pStyle", CT_String)
register_element_cls("w:spacing", CT_Spacing)
register_element_cls("w:tab", CT_TabStop)
register_element_cls("w:tabs", CT_TabStops)
register_element_cls("w:widowControl", CT_OnOff)

View File

@@ -0,0 +1,124 @@
"""Custom element classes related to document comments."""
from __future__ import annotations
import datetime as dt
from typing import TYPE_CHECKING, Callable, cast
from docx.oxml.ns import nsdecls
from docx.oxml.parser import parse_xml
from docx.oxml.simpletypes import ST_DateTime, ST_DecimalNumber, ST_String
from docx.oxml.xmlchemy import BaseOxmlElement, OptionalAttribute, RequiredAttribute, ZeroOrMore
if TYPE_CHECKING:
from docx.oxml.table import CT_Tbl
from docx.oxml.text.paragraph import CT_P
class CT_Comments(BaseOxmlElement):
"""`w:comments` element, the root element for the comments part.
Simply contains a collection of `w:comment` elements, each representing a single comment. Each
contained comment is identified by a unique `w:id` attribute, used to reference the comment
from the document text. The offset of the comment in this collection is arbitrary; it is
essentially a _set_ implemented as a list.
"""
# -- type-declarations to fill in the gaps for metaclass-added methods --
comment_lst: list[CT_Comment]
comment = ZeroOrMore("w:comment")
def add_comment(self) -> CT_Comment:
"""Return newly added `w:comment` child of this `w:comments`.
The returned `w:comment` element is the minimum valid value, having a `w:id` value unique
within the existing comments and the required `w:author` attribute present but set to the
empty string. It's content is limited to a single run containing the necessary annotation
reference but no text. Content is added by adding runs to this first paragraph and by
adding additional paragraphs as needed.
"""
next_id = self._next_available_comment_id()
comment = cast(
CT_Comment,
parse_xml(
f'<w:comment {nsdecls("w")} w:id="{next_id}" w:author="">'
f" <w:p>"
f" <w:pPr>"
f' <w:pStyle w:val="CommentText"/>'
f" </w:pPr>"
f" <w:r>"
f" <w:rPr>"
f' <w:rStyle w:val="CommentReference"/>'
f" </w:rPr>"
f" <w:annotationRef/>"
f" </w:r>"
f" </w:p>"
f"</w:comment>"
),
)
self.append(comment)
return comment
def get_comment_by_id(self, comment_id: int) -> CT_Comment | None:
"""Return the `w:comment` element identified by `comment_id`, or |None| if not found."""
comment_elms = self.xpath(f"(./w:comment[@w:id='{comment_id}'])[1]")
return comment_elms[0] if comment_elms else None
def _next_available_comment_id(self) -> int:
"""The next available comment id.
According to the schema, this can be any positive integer, as big as you like, and the
default mechanism is to use `max() + 1`. However, if that yields a value larger than will
fit in a 32-bit signed integer, we take a more deliberate approach to use the first
ununsed integer starting from 0.
"""
used_ids = [int(x) for x in self.xpath("./w:comment/@w:id")]
next_id = max(used_ids, default=-1) + 1
if next_id <= 2**31 - 1:
return next_id
# -- fall-back to enumerating all used ids to find the first unused one --
for expected, actual in enumerate(sorted(used_ids)):
if expected != actual:
return expected
return len(used_ids)
class CT_Comment(BaseOxmlElement):
"""`w:comment` element, representing a single comment.
A comment is a so-called "story" and can contain paragraphs and tables much like a table-cell.
While probably most often used for a single sentence or phrase, a comment can contain rich
content, including multiple rich-text paragraphs, hyperlinks, images, and tables.
"""
# -- attributes on `w:comment` --
id: int = RequiredAttribute("w:id", ST_DecimalNumber) # pyright: ignore[reportAssignmentType]
author: str = RequiredAttribute("w:author", ST_String) # pyright: ignore[reportAssignmentType]
initials: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:initials", ST_String
)
date: dt.datetime | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:date", ST_DateTime
)
# -- children --
p = ZeroOrMore("w:p", successors=())
tbl = ZeroOrMore("w:tbl", successors=())
# -- type-declarations for methods added by metaclass --
add_p: Callable[[], CT_P]
p_lst: list[CT_P]
tbl_lst: list[CT_Tbl]
_insert_tbl: Callable[[CT_Tbl], CT_Tbl]
@property
def inner_content_elements(self) -> list[CT_P | CT_Tbl]:
"""Generate all `w:p` and `w:tbl` elements in this comment."""
return self.xpath("./w:p | ./w:tbl")

View File

@@ -0,0 +1,298 @@
"""Custom element classes for core properties-related XML elements."""
from __future__ import annotations
import datetime as dt
import re
from typing import TYPE_CHECKING, Any, Callable, cast
from docx.oxml.ns import nsdecls, qn
from docx.oxml.parser import parse_xml
from docx.oxml.xmlchemy import BaseOxmlElement, ZeroOrOne
if TYPE_CHECKING:
from lxml.etree import _Element as etree_Element # pyright: ignore[reportPrivateUsage]
class CT_CoreProperties(BaseOxmlElement):
"""`<cp:coreProperties>` element, the root element of the Core Properties part.
Stored as `/docProps/core.xml`. Implements many of the Dublin Core document metadata
elements. String elements resolve to an empty string ("") if the element is not
present in the XML. String elements are limited in length to 255 unicode characters.
"""
get_or_add_revision: Callable[[], etree_Element]
category = ZeroOrOne("cp:category", successors=())
contentStatus = ZeroOrOne("cp:contentStatus", successors=())
created = ZeroOrOne("dcterms:created", successors=())
creator = ZeroOrOne("dc:creator", successors=())
description = ZeroOrOne("dc:description", successors=())
identifier = ZeroOrOne("dc:identifier", successors=())
keywords = ZeroOrOne("cp:keywords", successors=())
language = ZeroOrOne("dc:language", successors=())
lastModifiedBy = ZeroOrOne("cp:lastModifiedBy", successors=())
lastPrinted = ZeroOrOne("cp:lastPrinted", successors=())
modified = ZeroOrOne("dcterms:modified", successors=())
revision: etree_Element | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"cp:revision", successors=()
)
subject = ZeroOrOne("dc:subject", successors=())
title = ZeroOrOne("dc:title", successors=())
version = ZeroOrOne("cp:version", successors=())
_coreProperties_tmpl = "<cp:coreProperties %s/>\n" % nsdecls("cp", "dc", "dcterms")
@classmethod
def new(cls) -> CT_CoreProperties:
"""Return a new `<cp:coreProperties>` element."""
xml = cls._coreProperties_tmpl
coreProperties = cast(CT_CoreProperties, parse_xml(xml))
return coreProperties
@property
def author_text(self) -> str:
"""The text in the `dc:creator` child element."""
return self._text_of_element("creator")
@author_text.setter
def author_text(self, value: str):
self._set_element_text("creator", value)
@property
def category_text(self) -> str:
return self._text_of_element("category")
@category_text.setter
def category_text(self, value: str):
self._set_element_text("category", value)
@property
def comments_text(self) -> str:
return self._text_of_element("description")
@comments_text.setter
def comments_text(self, value: str):
self._set_element_text("description", value)
@property
def contentStatus_text(self) -> str:
return self._text_of_element("contentStatus")
@contentStatus_text.setter
def contentStatus_text(self, value: str):
self._set_element_text("contentStatus", value)
@property
def created_datetime(self) -> dt.datetime | None:
return self._datetime_of_element("created")
@created_datetime.setter
def created_datetime(self, value: dt.datetime):
self._set_element_datetime("created", value)
@property
def identifier_text(self) -> str:
return self._text_of_element("identifier")
@identifier_text.setter
def identifier_text(self, value: str):
self._set_element_text("identifier", value)
@property
def keywords_text(self) -> str:
return self._text_of_element("keywords")
@keywords_text.setter
def keywords_text(self, value: str):
self._set_element_text("keywords", value)
@property
def language_text(self) -> str:
return self._text_of_element("language")
@language_text.setter
def language_text(self, value: str):
self._set_element_text("language", value)
@property
def lastModifiedBy_text(self) -> str:
return self._text_of_element("lastModifiedBy")
@lastModifiedBy_text.setter
def lastModifiedBy_text(self, value: str):
self._set_element_text("lastModifiedBy", value)
@property
def lastPrinted_datetime(self) -> dt.datetime | None:
return self._datetime_of_element("lastPrinted")
@lastPrinted_datetime.setter
def lastPrinted_datetime(self, value: dt.datetime):
self._set_element_datetime("lastPrinted", value)
@property
def modified_datetime(self) -> dt.datetime | None:
return self._datetime_of_element("modified")
@modified_datetime.setter
def modified_datetime(self, value: dt.datetime):
self._set_element_datetime("modified", value)
@property
def revision_number(self) -> int:
"""Integer value of revision property."""
revision = self.revision
if revision is None:
return 0
revision_str = str(revision.text)
try:
revision = int(revision_str)
except ValueError:
# non-integer revision strings also resolve to 0
revision = 0
# as do negative integers
if revision < 0:
revision = 0
return revision
@revision_number.setter
def revision_number(self, value: int):
"""Set revision property to string value of integer `value`."""
if not isinstance(value, int) or value < 1: # pyright: ignore[reportUnnecessaryIsInstance]
tmpl = "revision property requires positive int, got '%s'"
raise ValueError(tmpl % value)
revision = self.get_or_add_revision()
revision.text = str(value)
@property
def subject_text(self) -> str:
return self._text_of_element("subject")
@subject_text.setter
def subject_text(self, value: str):
self._set_element_text("subject", value)
@property
def title_text(self) -> str:
return self._text_of_element("title")
@title_text.setter
def title_text(self, value: str):
self._set_element_text("title", value)
@property
def version_text(self) -> str:
return self._text_of_element("version")
@version_text.setter
def version_text(self, value: str):
self._set_element_text("version", value)
def _datetime_of_element(self, property_name: str) -> dt.datetime | None:
element = getattr(self, property_name)
if element is None:
return None
datetime_str = element.text
try:
return self._parse_W3CDTF_to_datetime(datetime_str)
except ValueError:
# invalid datetime strings are ignored
return None
def _get_or_add(self, prop_name: str) -> BaseOxmlElement:
"""Return element returned by "get_or_add_" method for `prop_name`."""
get_or_add_method_name = "get_or_add_%s" % prop_name
get_or_add_method = getattr(self, get_or_add_method_name)
element = get_or_add_method()
return element
@classmethod
def _offset_dt(cls, dt_: dt.datetime, offset_str: str) -> dt.datetime:
"""A |datetime| instance offset from `dt_` by timezone offset in `offset_str`.
`offset_str` is like `"-07:00"`.
"""
match = cls._offset_pattern.match(offset_str)
if match is None:
raise ValueError("'%s' is not a valid offset string" % offset_str)
sign, hours_str, minutes_str = match.groups()
sign_factor = -1 if sign == "+" else 1
hours = int(hours_str) * sign_factor
minutes = int(minutes_str) * sign_factor
td = dt.timedelta(hours=hours, minutes=minutes)
return dt_ + td
_offset_pattern = re.compile(r"([+-])(\d\d):(\d\d)")
@classmethod
def _parse_W3CDTF_to_datetime(cls, w3cdtf_str: str) -> dt.datetime:
# valid W3CDTF date cases:
# yyyy e.g. "2003"
# yyyy-mm e.g. "2003-12"
# yyyy-mm-dd e.g. "2003-12-31"
# UTC timezone e.g. "2003-12-31T10:14:55Z"
# numeric timezone e.g. "2003-12-31T10:14:55-08:00"
templates = (
"%Y-%m-%dT%H:%M:%S",
"%Y-%m-%d",
"%Y-%m",
"%Y",
)
# strptime isn't smart enough to parse literal timezone offsets like
# "-07:30", so we have to do it ourselves
parseable_part = w3cdtf_str[:19]
offset_str = w3cdtf_str[19:]
dt_ = None
for tmpl in templates:
try:
dt_ = dt.datetime.strptime(parseable_part, tmpl)
except ValueError:
continue
if dt_ is None:
tmpl = "could not parse W3CDTF datetime string '%s'"
raise ValueError(tmpl % w3cdtf_str)
if len(offset_str) == 6:
dt_ = cls._offset_dt(dt_, offset_str)
return dt_.replace(tzinfo=dt.timezone.utc)
def _set_element_datetime(self, prop_name: str, value: dt.datetime) -> None:
"""Set date/time value of child element having `prop_name` to `value`."""
if not isinstance(value, dt.datetime): # pyright: ignore[reportUnnecessaryIsInstance]
tmpl = "property requires <type 'datetime.datetime'> object, got %s"
raise ValueError(tmpl % type(value))
element = self._get_or_add(prop_name)
dt_str = value.strftime("%Y-%m-%dT%H:%M:%SZ")
element.text = dt_str
if prop_name in ("created", "modified"):
# These two require an explicit "xsi:type="dcterms:W3CDTF""
# attribute. The first and last line are a hack required to add
# the xsi namespace to the root element rather than each child
# element in which it is referenced
self.set(qn("xsi:foo"), "bar")
element.set(qn("xsi:type"), "dcterms:W3CDTF")
del self.attrib[qn("xsi:foo")]
def _set_element_text(self, prop_name: str, value: Any) -> None:
"""Set string value of `name` property to `value`."""
if not isinstance(value, str):
value = str(value)
if len(value) > 255:
tmpl = "exceeded 255 char limit for property, got:\n\n'%s'"
raise ValueError(tmpl % value)
element = self._get_or_add(prop_name)
element.text = value
def _text_of_element(self, property_name: str) -> str:
"""The text in the element matching `property_name`.
The empty string if the element is not present or contains no text.
"""
element = getattr(self, property_name)
if element is None:
return ""
if element.text is None:
return ""
return element.text

View File

@@ -0,0 +1,88 @@
"""Custom element classes that correspond to the document part, e.g. <w:document>."""
from __future__ import annotations
from typing import TYPE_CHECKING, Callable, List
from docx.oxml.section import CT_SectPr
from docx.oxml.xmlchemy import BaseOxmlElement, ZeroOrMore, ZeroOrOne
if TYPE_CHECKING:
from docx.oxml.table import CT_Tbl
from docx.oxml.text.paragraph import CT_P
class CT_Document(BaseOxmlElement):
"""``<w:document>`` element, the root element of a document.xml file."""
body: CT_Body = ZeroOrOne("w:body") # pyright: ignore[reportAssignmentType]
@property
def sectPr_lst(self) -> List[CT_SectPr]:
"""All `w:sectPr` elements directly accessible from document element.
Note this does not include a `sectPr` child in a paragraphs wrapped in
revision marks or other intervening layer, perhaps `w:sdt` or customXml
elements.
`w:sectPr` elements appear in document order. The last one is always
`w:body/w:sectPr`, all preceding are `w:p/w:pPr/w:sectPr`.
"""
xpath = "./w:body/w:p/w:pPr/w:sectPr | ./w:body/w:sectPr"
return self.xpath(xpath)
class CT_Body(BaseOxmlElement):
"""`w:body`, the container element for the main document story in `document.xml`."""
add_p: Callable[[], CT_P]
get_or_add_sectPr: Callable[[], CT_SectPr]
p_lst: List[CT_P]
tbl_lst: List[CT_Tbl]
_insert_tbl: Callable[[CT_Tbl], CT_Tbl]
p = ZeroOrMore("w:p", successors=("w:sectPr",))
tbl = ZeroOrMore("w:tbl", successors=("w:sectPr",))
sectPr: CT_SectPr | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"w:sectPr", successors=()
)
def add_section_break(self) -> CT_SectPr:
"""Return `w:sectPr` element for new section added at end of document.
The last `w:sectPr` becomes the second-to-last, with the new `w:sectPr` being an
exact clone of the previous one, except that all header and footer references
are removed (and are therefore now "inherited" from the prior section).
A copy of the previously-last `w:sectPr` will now appear in a new `w:p` at the
end of the document. The returned `w:sectPr` is the sentinel `w:sectPr` for the
document (and as implemented, `is` the prior sentinel `w:sectPr` with headers
and footers removed).
"""
# ---get the sectPr at file-end, which controls last section (sections[-1])---
sentinel_sectPr = self.get_or_add_sectPr()
# ---add exact copy to new `w:p` element; that is now second-to last section---
self.add_p().set_sectPr(sentinel_sectPr.clone())
# ---remove any header or footer references from "new" last section---
for hdrftr_ref in sentinel_sectPr.xpath("w:headerReference|w:footerReference"):
sentinel_sectPr.remove(hdrftr_ref)
# ---the sentinel `w:sectPr` now controls the new last section---
return sentinel_sectPr
def clear_content(self):
"""Remove all content child elements from this <w:body> element.
Leave the <w:sectPr> element if it is present.
"""
for content_elm in self.xpath("./*[not(self::w:sectPr)]"):
self.remove(content_elm)
@property
def inner_content_elements(self) -> List[CT_P | CT_Tbl]:
"""Generate all `w:p` and `w:tbl` elements in this document-body.
Elements appear in document order. Elements shaded by nesting in a `w:ins` or
other "wrapper" element will not be included.
"""
return self.xpath("./w:p | ./w:tbl")

View File

@@ -0,0 +1,11 @@
"""Custom element-classes for DrawingML-related elements like `<w:drawing>`.
For legacy reasons, many DrawingML-related elements are in `docx.oxml.shape`. Expect
those to move over here as we have reason to touch them.
"""
from docx.oxml.xmlchemy import BaseOxmlElement
class CT_Drawing(BaseOxmlElement):
"""`<w:drawing>` element, containing a DrawingML object like a picture or chart."""

View File

@@ -0,0 +1,10 @@
"""Exceptions for oxml sub-package."""
class XmlchemyError(Exception):
"""Generic error class."""
class InvalidXmlError(XmlchemyError):
"""Raised when invalid XML is encountered, such as on attempt to access a missing
required child element."""

View File

@@ -0,0 +1,109 @@
"""Namespace-related objects."""
from __future__ import annotations
from typing import Dict
nsmap = {
"a": "http://schemas.openxmlformats.org/drawingml/2006/main",
"c": "http://schemas.openxmlformats.org/drawingml/2006/chart",
"cp": "http://schemas.openxmlformats.org/package/2006/metadata/core-properties",
"dc": "http://purl.org/dc/elements/1.1/",
"dcmitype": "http://purl.org/dc/dcmitype/",
"dcterms": "http://purl.org/dc/terms/",
"dgm": "http://schemas.openxmlformats.org/drawingml/2006/diagram",
"m": "http://schemas.openxmlformats.org/officeDocument/2006/math",
"pic": "http://schemas.openxmlformats.org/drawingml/2006/picture",
"r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
"sl": "http://schemas.openxmlformats.org/schemaLibrary/2006/main",
"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
"w14": "http://schemas.microsoft.com/office/word/2010/wordml",
"wp": "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing",
"xml": "http://www.w3.org/XML/1998/namespace",
"xsi": "http://www.w3.org/2001/XMLSchema-instance",
}
pfxmap = {value: key for key, value in nsmap.items()}
class NamespacePrefixedTag(str):
"""Value object that knows the semantics of an XML tag having a namespace prefix."""
def __new__(cls, nstag: str):
return super(NamespacePrefixedTag, cls).__new__(cls, nstag)
def __init__(self, nstag: str):
self._pfx, self._local_part = nstag.split(":")
self._ns_uri = nsmap[self._pfx]
@property
def clark_name(self) -> str:
return "{%s}%s" % (self._ns_uri, self._local_part)
@classmethod
def from_clark_name(cls, clark_name: str) -> NamespacePrefixedTag:
nsuri, local_name = clark_name[1:].split("}")
nstag = "%s:%s" % (pfxmap[nsuri], local_name)
return cls(nstag)
@property
def local_part(self) -> str:
"""The local part of this tag.
E.g. "foobar" is returned for tag "f:foobar".
"""
return self._local_part
@property
def nsmap(self) -> Dict[str, str]:
"""Single-member dict mapping prefix of this tag to it's namespace name.
Example: `{"f": "http://foo/bar"}`. This is handy for passing to xpath calls
and other uses.
"""
return {self._pfx: self._ns_uri}
@property
def nspfx(self) -> str:
"""The namespace-prefix for this tag.
For example, "f" is returned for tag "f:foobar".
"""
return self._pfx
@property
def nsuri(self) -> str:
"""The namespace URI for this tag.
For example, "http://foo/bar" would be returned for tag "f:foobar" if the "f"
prefix maps to "http://foo/bar" in nsmap.
"""
return self._ns_uri
def nsdecls(*prefixes: str) -> str:
"""Namespace declaration including each namespace-prefix in `prefixes`.
Handy for adding required namespace declarations to a tree root element.
"""
return " ".join(['xmlns:%s="%s"' % (pfx, nsmap[pfx]) for pfx in prefixes])
def nspfxmap(*nspfxs: str) -> Dict[str, str]:
"""Subset namespace-prefix mappings specified by *nspfxs*.
Any number of namespace prefixes can be supplied, e.g. namespaces("a", "r", "p").
"""
return {pfx: nsmap[pfx] for pfx in nspfxs}
def qn(tag: str) -> str:
"""Stands for "qualified name".
This utility function converts a familiar namespace-prefixed tag name like "w:p"
into a Clark-notation qualified tag name for lxml. For example, `qn("w:p")` returns
"{http://schemas.openxmlformats.org/wordprocessingml/2006/main}p".
"""
prefix, tagroot = tag.split(":")
uri = nsmap[prefix]
return "{%s}%s" % (uri, tagroot)

View File

@@ -0,0 +1,109 @@
"""Custom element classes related to the numbering part."""
from docx.oxml.parser import OxmlElement
from docx.oxml.shared import CT_DecimalNumber
from docx.oxml.simpletypes import ST_DecimalNumber
from docx.oxml.xmlchemy import (
BaseOxmlElement,
OneAndOnlyOne,
RequiredAttribute,
ZeroOrMore,
ZeroOrOne,
)
class CT_Num(BaseOxmlElement):
"""``<w:num>`` element, which represents a concrete list definition instance, having
a required child <w:abstractNumId> that references an abstract numbering definition
that defines most of the formatting details."""
abstractNumId = OneAndOnlyOne("w:abstractNumId")
lvlOverride = ZeroOrMore("w:lvlOverride")
numId = RequiredAttribute("w:numId", ST_DecimalNumber)
def add_lvlOverride(self, ilvl):
"""Return a newly added CT_NumLvl (<w:lvlOverride>) element having its ``ilvl``
attribute set to `ilvl`."""
return self._add_lvlOverride(ilvl=ilvl)
@classmethod
def new(cls, num_id, abstractNum_id):
"""Return a new ``<w:num>`` element having numId of `num_id` and having a
``<w:abstractNumId>`` child with val attribute set to `abstractNum_id`."""
num = OxmlElement("w:num")
num.numId = num_id
abstractNumId = CT_DecimalNumber.new("w:abstractNumId", abstractNum_id)
num.append(abstractNumId)
return num
class CT_NumLvl(BaseOxmlElement):
"""``<w:lvlOverride>`` element, which identifies a level in a list definition to
override with settings it contains."""
startOverride = ZeroOrOne("w:startOverride", successors=("w:lvl",))
ilvl = RequiredAttribute("w:ilvl", ST_DecimalNumber)
def add_startOverride(self, val):
"""Return a newly added CT_DecimalNumber element having tagname
``w:startOverride`` and ``val`` attribute set to `val`."""
return self._add_startOverride(val=val)
class CT_NumPr(BaseOxmlElement):
"""A ``<w:numPr>`` element, a container for numbering properties applied to a
paragraph."""
ilvl = ZeroOrOne("w:ilvl", successors=("w:numId", "w:numberingChange", "w:ins"))
numId = ZeroOrOne("w:numId", successors=("w:numberingChange", "w:ins"))
# @ilvl.setter
# def _set_ilvl(self, val):
# """
# Get or add a <w:ilvl> child and set its ``w:val`` attribute to `val`.
# """
# ilvl = self.get_or_add_ilvl()
# ilvl.val = val
# @numId.setter
# def numId(self, val):
# """
# Get or add a <w:numId> child and set its ``w:val`` attribute to
# `val`.
# """
# numId = self.get_or_add_numId()
# numId.val = val
class CT_Numbering(BaseOxmlElement):
"""``<w:numbering>`` element, the root element of a numbering part, i.e.
numbering.xml."""
num = ZeroOrMore("w:num", successors=("w:numIdMacAtCleanup",))
def add_num(self, abstractNum_id):
"""Return a newly added CT_Num (<w:num>) element referencing the abstract
numbering definition identified by `abstractNum_id`."""
next_num_id = self._next_numId
num = CT_Num.new(next_num_id, abstractNum_id)
return self._insert_num(num)
def num_having_numId(self, numId):
"""Return the ``<w:num>`` child element having ``numId`` attribute matching
`numId`."""
xpath = './w:num[@w:numId="%d"]' % numId
try:
return self.xpath(xpath)[0]
except IndexError:
raise KeyError("no <w:num> element with numId %d" % numId)
@property
def _next_numId(self):
"""The first ``numId`` unused by a ``<w:num>`` element, starting at 1 and
filling any gaps in numbering between existing ``<w:num>`` elements."""
numId_strs = self.xpath("./w:num/@w:numId")
num_ids = [int(numId_str) for numId_str in numId_strs]
for num in range(1, len(num_ids) + 2):
if num not in num_ids:
break
return num

View File

@@ -0,0 +1,62 @@
# pyright: reportImportCycles=false
"""XML parser for python-docx."""
from __future__ import annotations
from typing import TYPE_CHECKING, Dict, Type, cast
from lxml import etree
from docx.oxml.ns import NamespacePrefixedTag, nsmap
if TYPE_CHECKING:
from docx.oxml.xmlchemy import BaseOxmlElement
# -- configure XML parser --
element_class_lookup = etree.ElementNamespaceClassLookup()
oxml_parser = etree.XMLParser(remove_blank_text=True, resolve_entities=False)
oxml_parser.set_element_class_lookup(element_class_lookup)
def parse_xml(xml: str | bytes) -> "BaseOxmlElement":
"""Root lxml element obtained by parsing XML character string `xml`.
The custom parser is used, so custom element classes are produced for elements in
`xml` that have them.
"""
return cast("BaseOxmlElement", etree.fromstring(xml, oxml_parser))
def register_element_cls(tag: str, cls: Type["BaseOxmlElement"]):
"""Register an lxml custom element-class to use for `tag`.
A instance of `cls` to be constructed when the oxml parser encounters an element
with matching `tag`. `tag` is a string of the form `nspfx:tagroot`, e.g.
`'w:document'`.
"""
nspfx, tagroot = tag.split(":")
namespace = element_class_lookup.get_namespace(nsmap[nspfx])
namespace[tagroot] = cls
def OxmlElement(
nsptag_str: str,
attrs: Dict[str, str] | None = None,
nsdecls: Dict[str, str] | None = None,
) -> BaseOxmlElement | etree._Element: # pyright: ignore[reportPrivateUsage]
"""Return a 'loose' lxml element having the tag specified by `nsptag_str`.
The tag in `nsptag_str` must contain the standard namespace prefix, e.g. `a:tbl`.
The resulting element is an instance of the custom element class for this tag name
if one is defined. A dictionary of attribute values may be provided as `attrs`; they
are set if present. All namespaces defined in the dict `nsdecls` are declared in the
element using the key as the prefix and the value as the namespace name. If
`nsdecls` is not provided, a single namespace declaration is added based on the
prefix on `nsptag_str`.
"""
nsptag = NamespacePrefixedTag(nsptag_str)
if nsdecls is None:
nsdecls = nsptag.nsmap
return oxml_parser.makeelement(nsptag.clark_name, attrib=attrs, nsmap=nsdecls)

View File

@@ -0,0 +1,537 @@
"""Section-related custom element classes."""
from __future__ import annotations
from copy import deepcopy
from typing import Callable, Iterator, List, Sequence, cast
from lxml import etree
from typing_extensions import TypeAlias
from docx.enum.section import WD_HEADER_FOOTER, WD_ORIENTATION, WD_SECTION_START
from docx.oxml.ns import nsmap
from docx.oxml.shared import CT_OnOff
from docx.oxml.simpletypes import ST_SignedTwipsMeasure, ST_TwipsMeasure, XsdString
from docx.oxml.table import CT_Tbl
from docx.oxml.text.paragraph import CT_P
from docx.oxml.xmlchemy import (
BaseOxmlElement,
OptionalAttribute,
RequiredAttribute,
ZeroOrMore,
ZeroOrOne,
)
from docx.shared import Length, lazyproperty
BlockElement: TypeAlias = "CT_P | CT_Tbl"
class CT_HdrFtr(BaseOxmlElement):
"""`w:hdr` and `w:ftr`, the root element for header and footer part respectively."""
add_p: Callable[[], CT_P]
p_lst: List[CT_P]
tbl_lst: List[CT_Tbl]
_insert_tbl: Callable[[CT_Tbl], CT_Tbl]
p = ZeroOrMore("w:p", successors=())
tbl = ZeroOrMore("w:tbl", successors=())
@property
def inner_content_elements(self) -> List[CT_P | CT_Tbl]:
"""Generate all `w:p` and `w:tbl` elements in this header or footer.
Elements appear in document order. Elements shaded by nesting in a `w:ins` or
other "wrapper" element will not be included.
"""
return self.xpath("./w:p | ./w:tbl")
class CT_HdrFtrRef(BaseOxmlElement):
"""`w:headerReference` and `w:footerReference` elements."""
type_: WD_HEADER_FOOTER = RequiredAttribute( # pyright: ignore[reportAssignmentType]
"w:type", WD_HEADER_FOOTER
)
rId: str = RequiredAttribute("r:id", XsdString) # pyright: ignore[reportAssignmentType]
class CT_PageMar(BaseOxmlElement):
"""``<w:pgMar>`` element, defining page margins."""
top: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:top", ST_SignedTwipsMeasure
)
right: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:right", ST_TwipsMeasure
)
bottom: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:bottom", ST_SignedTwipsMeasure
)
left: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:left", ST_TwipsMeasure
)
header: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:header", ST_TwipsMeasure
)
footer: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:footer", ST_TwipsMeasure
)
gutter: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:gutter", ST_TwipsMeasure
)
class CT_PageSz(BaseOxmlElement):
"""``<w:pgSz>`` element, defining page dimensions and orientation."""
w: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:w", ST_TwipsMeasure
)
h: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:h", ST_TwipsMeasure
)
orient: WD_ORIENTATION = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:orient", WD_ORIENTATION, default=WD_ORIENTATION.PORTRAIT
)
class CT_SectPr(BaseOxmlElement):
"""`w:sectPr` element, the container element for section properties."""
get_or_add_pgMar: Callable[[], CT_PageMar]
get_or_add_pgSz: Callable[[], CT_PageSz]
get_or_add_titlePg: Callable[[], CT_OnOff]
get_or_add_type: Callable[[], CT_SectType]
_add_footerReference: Callable[[], CT_HdrFtrRef]
_add_headerReference: Callable[[], CT_HdrFtrRef]
_remove_titlePg: Callable[[], None]
_remove_type: Callable[[], None]
_tag_seq = (
"w:footnotePr",
"w:endnotePr",
"w:type",
"w:pgSz",
"w:pgMar",
"w:paperSrc",
"w:pgBorders",
"w:lnNumType",
"w:pgNumType",
"w:cols",
"w:formProt",
"w:vAlign",
"w:noEndnote",
"w:titlePg",
"w:textDirection",
"w:bidi",
"w:rtlGutter",
"w:docGrid",
"w:printerSettings",
"w:sectPrChange",
)
headerReference = ZeroOrMore("w:headerReference", successors=_tag_seq)
footerReference = ZeroOrMore("w:footerReference", successors=_tag_seq)
type: CT_SectType | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"w:type", successors=_tag_seq[3:]
)
pgSz: CT_PageSz | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"w:pgSz", successors=_tag_seq[4:]
)
pgMar: CT_PageMar | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"w:pgMar", successors=_tag_seq[5:]
)
titlePg: CT_OnOff | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"w:titlePg", successors=_tag_seq[14:]
)
del _tag_seq
def add_footerReference(self, type_: WD_HEADER_FOOTER, rId: str) -> CT_HdrFtrRef:
"""Return newly added CT_HdrFtrRef element of `type_` with `rId`.
The element tag is `w:footerReference`.
"""
footerReference = self._add_footerReference()
footerReference.type_ = type_
footerReference.rId = rId
return footerReference
def add_headerReference(self, type_: WD_HEADER_FOOTER, rId: str) -> CT_HdrFtrRef:
"""Return newly added CT_HdrFtrRef element of `type_` with `rId`.
The element tag is `w:headerReference`.
"""
headerReference = self._add_headerReference()
headerReference.type_ = type_
headerReference.rId = rId
return headerReference
@property
def bottom_margin(self) -> Length | None:
"""Value of the `w:bottom` attr of `<w:pgMar>` child element, as |Length|.
|None| when either the element or the attribute is not present.
"""
pgMar = self.pgMar
if pgMar is None:
return None
return pgMar.bottom
@bottom_margin.setter
def bottom_margin(self, value: int | Length | None):
pgMar = self.get_or_add_pgMar()
pgMar.bottom = value if value is None or isinstance(value, Length) else Length(value)
def clone(self) -> CT_SectPr:
"""Return an exact duplicate of this ``<w:sectPr>`` element tree suitable for
use in adding a section break.
All rsid* attributes are removed from the root ``<w:sectPr>`` element.
"""
cloned_sectPr = deepcopy(self)
cloned_sectPr.attrib.clear()
return cloned_sectPr
@property
def footer(self) -> Length | None:
"""Distance from bottom edge of page to bottom edge of the footer.
This is the value of the `w:footer` attribute in the `w:pgMar` child element,
as a |Length| object, or |None| if either the element or the attribute is not
present.
"""
pgMar = self.pgMar
if pgMar is None:
return None
return pgMar.footer
@footer.setter
def footer(self, value: int | Length | None):
pgMar = self.get_or_add_pgMar()
pgMar.footer = value if value is None or isinstance(value, Length) else Length(value)
def get_footerReference(self, type_: WD_HEADER_FOOTER) -> CT_HdrFtrRef | None:
"""Return footerReference element of `type_` or None if not present."""
path = "./w:footerReference[@w:type='%s']" % WD_HEADER_FOOTER.to_xml(type_)
footerReferences = self.xpath(path)
if not footerReferences:
return None
return footerReferences[0]
def get_headerReference(self, type_: WD_HEADER_FOOTER) -> CT_HdrFtrRef | None:
"""Return headerReference element of `type_` or None if not present."""
matching_headerReferences = self.xpath(
"./w:headerReference[@w:type='%s']" % WD_HEADER_FOOTER.to_xml(type_)
)
if len(matching_headerReferences) == 0:
return None
return matching_headerReferences[0]
@property
def gutter(self) -> Length | None:
"""The value of the ``w:gutter`` attribute in the ``<w:pgMar>`` child element,
as a |Length| object, or |None| if either the element or the attribute is not
present."""
pgMar = self.pgMar
if pgMar is None:
return None
return pgMar.gutter
@gutter.setter
def gutter(self, value: int | Length | None):
pgMar = self.get_or_add_pgMar()
pgMar.gutter = value if value is None or isinstance(value, Length) else Length(value)
@property
def header(self) -> Length | None:
"""Distance from top edge of page to top edge of header.
This value comes from the `w:header` attribute on the `w:pgMar` child element.
|None| if either the element or the attribute is not present.
"""
pgMar = self.pgMar
if pgMar is None:
return None
return pgMar.header
@header.setter
def header(self, value: int | Length | None):
pgMar = self.get_or_add_pgMar()
pgMar.header = value if value is None or isinstance(value, Length) else Length(value)
def iter_inner_content(self) -> Iterator[CT_P | CT_Tbl]:
"""Generate all `w:p` and `w:tbl` elements in this section.
Elements appear in document order. Elements shaded by nesting in a `w:ins` or
other "wrapper" element will not be included.
"""
return _SectBlockElementIterator.iter_sect_block_elements(self)
@property
def left_margin(self) -> Length | None:
"""The value of the ``w:left`` attribute in the ``<w:pgMar>`` child element, as
a |Length| object, or |None| if either the element or the attribute is not
present."""
pgMar = self.pgMar
if pgMar is None:
return None
return pgMar.left
@left_margin.setter
def left_margin(self, value: int | Length | None):
pgMar = self.get_or_add_pgMar()
pgMar.left = value if value is None or isinstance(value, Length) else Length(value)
@property
def orientation(self) -> WD_ORIENTATION:
"""`WD_ORIENTATION` member indicating page-orientation for this section.
This is the value of the `orient` attribute on the `w:pgSz` child, or
`WD_ORIENTATION.PORTRAIT` if not present.
"""
pgSz = self.pgSz
if pgSz is None:
return WD_ORIENTATION.PORTRAIT
return pgSz.orient
@orientation.setter
def orientation(self, value: WD_ORIENTATION | None):
pgSz = self.get_or_add_pgSz()
pgSz.orient = value if value else WD_ORIENTATION.PORTRAIT
@property
def page_height(self) -> Length | None:
"""Value in EMU of the `h` attribute of the `w:pgSz` child element.
|None| if not present.
"""
pgSz = self.pgSz
if pgSz is None:
return None
return pgSz.h
@page_height.setter
def page_height(self, value: Length | None):
pgSz = self.get_or_add_pgSz()
pgSz.h = value
@property
def page_width(self) -> Length | None:
"""Value in EMU of the ``w`` attribute of the ``<w:pgSz>`` child element.
|None| if not present.
"""
pgSz = self.pgSz
if pgSz is None:
return None
return pgSz.w
@page_width.setter
def page_width(self, value: Length | None):
pgSz = self.get_or_add_pgSz()
pgSz.w = value
@property
def preceding_sectPr(self) -> CT_SectPr | None:
"""SectPr immediately preceding this one or None if this is the first."""
# -- [1] predicate returns list of zero or one value --
preceding_sectPrs = self.xpath("./preceding::w:sectPr[1]")
return preceding_sectPrs[0] if len(preceding_sectPrs) > 0 else None
def remove_footerReference(self, type_: WD_HEADER_FOOTER) -> str:
"""Return rId of w:footerReference child of `type_` after removing it."""
footerReference = self.get_footerReference(type_)
if footerReference is None:
# -- should never happen, but to satisfy type-check and just in case --
raise ValueError("CT_SectPr has no footer reference")
rId = footerReference.rId
self.remove(footerReference)
return rId
def remove_headerReference(self, type_: WD_HEADER_FOOTER):
"""Return rId of w:headerReference child of `type_` after removing it."""
headerReference = self.get_headerReference(type_)
if headerReference is None:
# -- should never happen, but to satisfy type-check and just in case --
raise ValueError("CT_SectPr has no header reference")
rId = headerReference.rId
self.remove(headerReference)
return rId
@property
def right_margin(self) -> Length | None:
"""The value of the ``w:right`` attribute in the ``<w:pgMar>`` child element, as
a |Length| object, or |None| if either the element or the attribute is not
present."""
pgMar = self.pgMar
if pgMar is None:
return None
return pgMar.right
@right_margin.setter
def right_margin(self, value: Length | None):
pgMar = self.get_or_add_pgMar()
pgMar.right = value
@property
def start_type(self) -> WD_SECTION_START:
"""The member of the ``WD_SECTION_START`` enumeration corresponding to the value
of the ``val`` attribute of the ``<w:type>`` child element, or
``WD_SECTION_START.NEW_PAGE`` if not present."""
type = self.type
if type is None or type.val is None:
return WD_SECTION_START.NEW_PAGE
return type.val
@start_type.setter
def start_type(self, value: WD_SECTION_START | None):
if value is None or value is WD_SECTION_START.NEW_PAGE:
self._remove_type()
return
type = self.get_or_add_type()
type.val = value
@property
def titlePg_val(self) -> bool:
"""Value of `w:titlePg/@val` or |False| if `./w:titlePg` is not present."""
titlePg = self.titlePg
if titlePg is None:
return False
return titlePg.val
@titlePg_val.setter
def titlePg_val(self, value: bool | None):
if value in [None, False]:
self._remove_titlePg()
else:
self.get_or_add_titlePg().val = True
@property
def top_margin(self) -> Length | None:
"""The value of the ``w:top`` attribute in the ``<w:pgMar>`` child element, as a
|Length| object, or |None| if either the element or the attribute is not
present."""
pgMar = self.pgMar
if pgMar is None:
return None
return pgMar.top
@top_margin.setter
def top_margin(self, value: Length | None):
pgMar = self.get_or_add_pgMar()
pgMar.top = value
class CT_SectType(BaseOxmlElement):
"""``<w:sectType>`` element, defining the section start type."""
val: WD_SECTION_START | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:val", WD_SECTION_START
)
# == HELPERS =========================================================================
class _SectBlockElementIterator:
"""Generates the block-item XML elements in a section.
A block-item element is a `CT_P` (paragraph) or a `CT_Tbl` (table).
"""
_compiled_blocks_xpath: etree.XPath | None = None
_compiled_count_xpath: etree.XPath | None = None
def __init__(self, sectPr: CT_SectPr):
self._sectPr = sectPr
@classmethod
def iter_sect_block_elements(cls, sectPr: CT_SectPr) -> Iterator[BlockElement]:
"""Generate each CT_P or CT_Tbl element within extents governed by `sectPr`."""
return cls(sectPr)._iter_sect_block_elements()
def _iter_sect_block_elements(self) -> Iterator[BlockElement]:
"""Generate each CT_P or CT_Tbl element in section."""
# -- General strategy is to get all block (<w;p> and <w:tbl>) elements from
# -- start of doc to and including this section, then compute the count of those
# -- elements that came from prior sections and skip that many to leave only the
# -- ones in this section. It's possible to express this "between here and
# -- there" (end of prior section and end of this one) concept in XPath, but it
# -- would be harder to follow because there are special cases (e.g. no prior
# -- section) and the boundary expressions are fairly hairy. I also believe it
# -- would be computationally more expensive than doing it this straighforward
# -- albeit (theoretically) slightly wasteful way.
sectPr, sectPrs = self._sectPr, self._sectPrs
sectPr_idx = sectPrs.index(sectPr)
# -- count block items belonging to prior sections --
n_blks_to_skip = (
0
if sectPr_idx == 0
else self._count_of_blocks_in_and_above_section(sectPrs[sectPr_idx - 1])
)
# -- and skip those in set of all blks from doc start to end of this section --
for element in self._blocks_in_and_above_section(sectPr)[n_blks_to_skip:]:
yield element
def _blocks_in_and_above_section(self, sectPr: CT_SectPr) -> Sequence[BlockElement]:
"""All ps and tbls in section defined by `sectPr` and all prior sections."""
if self._compiled_blocks_xpath is None:
self._compiled_blocks_xpath = etree.XPath(
self._blocks_in_and_above_section_xpath,
namespaces=nsmap,
regexp=False,
)
xpath = self._compiled_blocks_xpath
# -- XPath callable results are Any (basically), so need a cast. --
return cast(Sequence[BlockElement], xpath(sectPr))
@lazyproperty
def _blocks_in_and_above_section_xpath(self) -> str:
"""XPath expr for ps and tbls in context of a sectPr and all prior sectPrs."""
# -- "p_sect" is a section with sectPr located at w:p/w:pPr/w:sectPr.
# -- "body_sect" is a section with sectPr located at w:body/w:sectPr. The last
# -- section in the document is a "body_sect". All others are of the "p_sect"
# -- variety. "term" means "terminal", like the last p or tbl in the section.
# -- "pred" means "predecessor", like a preceding p or tbl in the section.
# -- the terminal block in a p-based sect is the p the sectPr appears in --
p_sect_term_block = "./parent::w:pPr/parent::w:p"
# -- the terminus of a body-based sect is the sectPr itself (not a block) --
body_sect_term = "self::w:sectPr[parent::w:body]"
# -- all the ps and tbls preceding (but not including) the context node --
pred_ps_and_tbls = "preceding-sibling::*[self::w:p | self::w:tbl]"
# -- p_sect_term_block and body_sect_term(inus) are mutually exclusive. So the
# -- result is either the union of nodes found by the first two selectors or the
# -- nodes found by the last selector, never both.
return (
# -- include the p containing a sectPr --
f"{p_sect_term_block}"
# -- along with all the blocks that precede it --
f" | {p_sect_term_block}/{pred_ps_and_tbls}"
# -- or all the preceding blocks if sectPr is body-based (last sectPr) --
f" | {body_sect_term}/{pred_ps_and_tbls}"
)
def _count_of_blocks_in_and_above_section(self, sectPr: CT_SectPr) -> int:
"""All ps and tbls in section defined by `sectPr` and all prior sections."""
if self._compiled_count_xpath is None:
self._compiled_count_xpath = etree.XPath(
f"count({self._blocks_in_and_above_section_xpath})",
namespaces=nsmap,
regexp=False,
)
xpath = self._compiled_count_xpath
# -- numeric XPath results are always float, so need an int() conversion --
return int(cast(float, xpath(sectPr)))
@lazyproperty
def _sectPrs(self) -> Sequence[CT_SectPr]:
"""All w:sectPr elements in document, in document-order."""
return self._sectPr.xpath(
"/w:document/w:body/w:p/w:pPr/w:sectPr | /w:document/w:body/w:sectPr",
)

View File

@@ -0,0 +1,138 @@
"""Custom element classes related to document settings."""
from __future__ import annotations
from typing import TYPE_CHECKING, Callable
from docx.oxml.xmlchemy import BaseOxmlElement, ZeroOrOne
if TYPE_CHECKING:
from docx.oxml.shared import CT_OnOff
class CT_Settings(BaseOxmlElement):
"""`w:settings` element, root element for the settings part."""
get_or_add_evenAndOddHeaders: Callable[[], CT_OnOff]
_remove_evenAndOddHeaders: Callable[[], None]
_tag_seq = (
"w:writeProtection",
"w:view",
"w:zoom",
"w:removePersonalInformation",
"w:removeDateAndTime",
"w:doNotDisplayPageBoundaries",
"w:displayBackgroundShape",
"w:printPostScriptOverText",
"w:printFractionalCharacterWidth",
"w:printFormsData",
"w:embedTrueTypeFonts",
"w:embedSystemFonts",
"w:saveSubsetFonts",
"w:saveFormsData",
"w:mirrorMargins",
"w:alignBordersAndEdges",
"w:bordersDoNotSurroundHeader",
"w:bordersDoNotSurroundFooter",
"w:gutterAtTop",
"w:hideSpellingErrors",
"w:hideGrammaticalErrors",
"w:activeWritingStyle",
"w:proofState",
"w:formsDesign",
"w:attachedTemplate",
"w:linkStyles",
"w:stylePaneFormatFilter",
"w:stylePaneSortMethod",
"w:documentType",
"w:mailMerge",
"w:revisionView",
"w:trackRevisions",
"w:doNotTrackMoves",
"w:doNotTrackFormatting",
"w:documentProtection",
"w:autoFormatOverride",
"w:styleLockTheme",
"w:styleLockQFSet",
"w:defaultTabStop",
"w:autoHyphenation",
"w:consecutiveHyphenLimit",
"w:hyphenationZone",
"w:doNotHyphenateCaps",
"w:showEnvelope",
"w:summaryLength",
"w:clickAndTypeStyle",
"w:defaultTableStyle",
"w:evenAndOddHeaders",
"w:bookFoldRevPrinting",
"w:bookFoldPrinting",
"w:bookFoldPrintingSheets",
"w:drawingGridHorizontalSpacing",
"w:drawingGridVerticalSpacing",
"w:displayHorizontalDrawingGridEvery",
"w:displayVerticalDrawingGridEvery",
"w:doNotUseMarginsForDrawingGridOrigin",
"w:drawingGridHorizontalOrigin",
"w:drawingGridVerticalOrigin",
"w:doNotShadeFormData",
"w:noPunctuationKerning",
"w:characterSpacingControl",
"w:printTwoOnOne",
"w:strictFirstAndLastChars",
"w:noLineBreaksAfter",
"w:noLineBreaksBefore",
"w:savePreviewPicture",
"w:doNotValidateAgainstSchema",
"w:saveInvalidXml",
"w:ignoreMixedContent",
"w:alwaysShowPlaceholderText",
"w:doNotDemarcateInvalidXml",
"w:saveXmlDataOnly",
"w:useXSLTWhenSaving",
"w:saveThroughXslt",
"w:showXMLTags",
"w:alwaysMergeEmptyNamespace",
"w:updateFields",
"w:hdrShapeDefaults",
"w:footnotePr",
"w:endnotePr",
"w:compat",
"w:docVars",
"w:rsids",
"m:mathPr",
"w:attachedSchema",
"w:themeFontLang",
"w:clrSchemeMapping",
"w:doNotIncludeSubdocsInStats",
"w:doNotAutoCompressPictures",
"w:forceUpgrade",
"w:captions",
"w:readModeInkLockDown",
"w:smartTagType",
"sl:schemaLibrary",
"w:shapeDefaults",
"w:doNotEmbedSmartTags",
"w:decimalSymbol",
"w:listSeparator",
)
evenAndOddHeaders: CT_OnOff | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"w:evenAndOddHeaders", successors=_tag_seq[48:]
)
del _tag_seq
@property
def evenAndOddHeaders_val(self) -> bool:
"""Value of `w:evenAndOddHeaders/@w:val` or |None| if not present."""
evenAndOddHeaders = self.evenAndOddHeaders
if evenAndOddHeaders is None:
return False
return evenAndOddHeaders.val
@evenAndOddHeaders_val.setter
def evenAndOddHeaders_val(self, value: bool | None):
if value is None or value is False:
self._remove_evenAndOddHeaders()
return
self.get_or_add_evenAndOddHeaders().val = value

View File

@@ -0,0 +1,299 @@
"""Custom element classes for shape-related elements like `<w:inline>`."""
from __future__ import annotations
from typing import TYPE_CHECKING, cast
from docx.oxml.ns import nsdecls
from docx.oxml.parser import parse_xml
from docx.oxml.simpletypes import (
ST_Coordinate,
ST_DrawingElementId,
ST_PositiveCoordinate,
ST_RelationshipId,
XsdString,
XsdToken,
)
from docx.oxml.xmlchemy import (
BaseOxmlElement,
OneAndOnlyOne,
OptionalAttribute,
RequiredAttribute,
ZeroOrOne,
)
if TYPE_CHECKING:
from docx.shared import Length
class CT_Anchor(BaseOxmlElement):
"""`<wp:anchor>` element, container for a "floating" shape."""
class CT_Blip(BaseOxmlElement):
"""``<a:blip>`` element, specifies image source and adjustments such as alpha and
tint."""
embed: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"r:embed", ST_RelationshipId
)
link: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"r:link", ST_RelationshipId
)
class CT_BlipFillProperties(BaseOxmlElement):
"""``<pic:blipFill>`` element, specifies picture properties."""
blip: CT_Blip = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"a:blip", successors=("a:srcRect", "a:tile", "a:stretch")
)
class CT_GraphicalObject(BaseOxmlElement):
"""``<a:graphic>`` element, container for a DrawingML object."""
graphicData: CT_GraphicalObjectData = OneAndOnlyOne( # pyright: ignore[reportAssignmentType]
"a:graphicData"
)
class CT_GraphicalObjectData(BaseOxmlElement):
"""``<a:graphicData>`` element, container for the XML of a DrawingML object."""
pic: CT_Picture = ZeroOrOne("pic:pic") # pyright: ignore[reportAssignmentType]
uri: str = RequiredAttribute("uri", XsdToken) # pyright: ignore[reportAssignmentType]
class CT_Inline(BaseOxmlElement):
"""`<wp:inline>` element, container for an inline shape."""
extent: CT_PositiveSize2D = OneAndOnlyOne("wp:extent") # pyright: ignore[reportAssignmentType]
docPr: CT_NonVisualDrawingProps = OneAndOnlyOne( # pyright: ignore[reportAssignmentType]
"wp:docPr"
)
graphic: CT_GraphicalObject = OneAndOnlyOne( # pyright: ignore[reportAssignmentType]
"a:graphic"
)
@classmethod
def new(cls, cx: Length, cy: Length, shape_id: int, pic: CT_Picture) -> CT_Inline:
"""Return a new ``<wp:inline>`` element populated with the values passed as
parameters."""
inline = cast(CT_Inline, parse_xml(cls._inline_xml()))
inline.extent.cx = cx
inline.extent.cy = cy
inline.docPr.id = shape_id
inline.docPr.name = "Picture %d" % shape_id
inline.graphic.graphicData.uri = "http://schemas.openxmlformats.org/drawingml/2006/picture"
inline.graphic.graphicData._insert_pic(pic)
return inline
@classmethod
def new_pic_inline(
cls, shape_id: int, rId: str, filename: str, cx: Length, cy: Length
) -> CT_Inline:
"""Create `wp:inline` element containing a `pic:pic` element.
The contents of the `pic:pic` element is taken from the argument values.
"""
pic_id = 0 # Word doesn't seem to use this, but does not omit it
pic = CT_Picture.new(pic_id, filename, rId, cx, cy)
inline = cls.new(cx, cy, shape_id, pic)
return inline
@classmethod
def _inline_xml(cls):
return (
"<wp:inline %s>\n"
' <wp:extent cx="914400" cy="914400"/>\n'
' <wp:docPr id="666" name="unnamed"/>\n'
" <wp:cNvGraphicFramePr>\n"
' <a:graphicFrameLocks noChangeAspect="1"/>\n'
" </wp:cNvGraphicFramePr>\n"
" <a:graphic>\n"
' <a:graphicData uri="URI not set"/>\n'
" </a:graphic>\n"
"</wp:inline>" % nsdecls("wp", "a", "pic", "r")
)
class CT_NonVisualDrawingProps(BaseOxmlElement):
"""Used for ``<wp:docPr>`` element, and perhaps others.
Specifies the id and name of a DrawingML drawing.
"""
id = RequiredAttribute("id", ST_DrawingElementId)
name = RequiredAttribute("name", XsdString)
class CT_NonVisualPictureProperties(BaseOxmlElement):
"""``<pic:cNvPicPr>`` element, specifies picture locking and resize behaviors."""
class CT_Picture(BaseOxmlElement):
"""``<pic:pic>`` element, a DrawingML picture."""
nvPicPr: CT_PictureNonVisual = OneAndOnlyOne( # pyright: ignore[reportAssignmentType]
"pic:nvPicPr"
)
blipFill: CT_BlipFillProperties = OneAndOnlyOne( # pyright: ignore[reportAssignmentType]
"pic:blipFill"
)
spPr: CT_ShapeProperties = OneAndOnlyOne("pic:spPr") # pyright: ignore[reportAssignmentType]
@classmethod
def new(cls, pic_id: int, filename: str, rId: str, cx: Length, cy: Length) -> CT_Picture:
"""A new minimum viable `<pic:pic>` (picture) element."""
pic = parse_xml(cls._pic_xml())
pic.nvPicPr.cNvPr.id = pic_id
pic.nvPicPr.cNvPr.name = filename
pic.blipFill.blip.embed = rId
pic.spPr.cx = cx
pic.spPr.cy = cy
return pic
@classmethod
def _pic_xml(cls):
return (
"<pic:pic %s>\n"
" <pic:nvPicPr>\n"
' <pic:cNvPr id="666" name="unnamed"/>\n'
" <pic:cNvPicPr/>\n"
" </pic:nvPicPr>\n"
" <pic:blipFill>\n"
" <a:blip/>\n"
" <a:stretch>\n"
" <a:fillRect/>\n"
" </a:stretch>\n"
" </pic:blipFill>\n"
" <pic:spPr>\n"
" <a:xfrm>\n"
' <a:off x="0" y="0"/>\n'
' <a:ext cx="914400" cy="914400"/>\n'
" </a:xfrm>\n"
' <a:prstGeom prst="rect"/>\n'
" </pic:spPr>\n"
"</pic:pic>" % nsdecls("pic", "a", "r")
)
class CT_PictureNonVisual(BaseOxmlElement):
"""``<pic:nvPicPr>`` element, non-visual picture properties."""
cNvPr = OneAndOnlyOne("pic:cNvPr")
class CT_Point2D(BaseOxmlElement):
"""Used for ``<a:off>`` element, and perhaps others.
Specifies an x, y coordinate (point).
"""
x = RequiredAttribute("x", ST_Coordinate)
y = RequiredAttribute("y", ST_Coordinate)
class CT_PositiveSize2D(BaseOxmlElement):
"""Used for ``<wp:extent>`` element, and perhaps others later.
Specifies the size of a DrawingML drawing.
"""
cx: Length = RequiredAttribute( # pyright: ignore[reportAssignmentType]
"cx", ST_PositiveCoordinate
)
cy: Length = RequiredAttribute( # pyright: ignore[reportAssignmentType]
"cy", ST_PositiveCoordinate
)
class CT_PresetGeometry2D(BaseOxmlElement):
"""``<a:prstGeom>`` element, specifies an preset autoshape geometry, such as
``rect``."""
class CT_RelativeRect(BaseOxmlElement):
"""``<a:fillRect>`` element, specifying picture should fill containing rectangle
shape."""
class CT_ShapeProperties(BaseOxmlElement):
"""``<pic:spPr>`` element, specifies size and shape of picture container."""
xfrm = ZeroOrOne(
"a:xfrm",
successors=(
"a:custGeom",
"a:prstGeom",
"a:ln",
"a:effectLst",
"a:effectDag",
"a:scene3d",
"a:sp3d",
"a:extLst",
),
)
@property
def cx(self):
"""Shape width as an instance of Emu, or None if not present."""
xfrm = self.xfrm
if xfrm is None:
return None
return xfrm.cx
@cx.setter
def cx(self, value):
xfrm = self.get_or_add_xfrm()
xfrm.cx = value
@property
def cy(self):
"""Shape height as an instance of Emu, or None if not present."""
xfrm = self.xfrm
if xfrm is None:
return None
return xfrm.cy
@cy.setter
def cy(self, value):
xfrm = self.get_or_add_xfrm()
xfrm.cy = value
class CT_StretchInfoProperties(BaseOxmlElement):
"""``<a:stretch>`` element, specifies how picture should fill its containing
shape."""
class CT_Transform2D(BaseOxmlElement):
"""``<a:xfrm>`` element, specifies size and shape of picture container."""
off = ZeroOrOne("a:off", successors=("a:ext",))
ext = ZeroOrOne("a:ext", successors=())
@property
def cx(self):
ext = self.ext
if ext is None:
return None
return ext.cx
@cx.setter
def cx(self, value):
ext = self.get_or_add_ext()
ext.cx = value
@property
def cy(self):
ext = self.ext
if ext is None:
return None
return ext.cy
@cy.setter
def cy(self, value):
ext = self.get_or_add_ext()
ext.cy = value

View File

@@ -0,0 +1,52 @@
"""Objects shared by modules in the docx.oxml subpackage."""
from __future__ import annotations
from typing import cast
from docx.oxml.ns import qn
from docx.oxml.parser import OxmlElement
from docx.oxml.simpletypes import ST_DecimalNumber, ST_OnOff, ST_String
from docx.oxml.xmlchemy import BaseOxmlElement, OptionalAttribute, RequiredAttribute
class CT_DecimalNumber(BaseOxmlElement):
"""Used for ``<w:numId>``, ``<w:ilvl>``, ``<w:abstractNumId>`` and several others,
containing a text representation of a decimal number (e.g. 42) in its ``val``
attribute."""
val: int = RequiredAttribute("w:val", ST_DecimalNumber) # pyright: ignore[reportAssignmentType]
@classmethod
def new(cls, nsptagname: str, val: int):
"""Return a new ``CT_DecimalNumber`` element having tagname `nsptagname` and
``val`` attribute set to `val`."""
return OxmlElement(nsptagname, attrs={qn("w:val"): str(val)})
class CT_OnOff(BaseOxmlElement):
"""Used for `w:b`, `w:i` elements and others.
Contains a bool-ish string in its `val` attribute, xsd:boolean plus "on" and
"off". Defaults to `True`, so `<w:b>` for example means "bold is turned on".
"""
val: bool = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:val", ST_OnOff, default=True
)
class CT_String(BaseOxmlElement):
"""Used for `w:pStyle` and `w:tblStyle` elements and others.
In those cases, it containing a style name in its `val` attribute.
"""
val: str = RequiredAttribute("w:val", ST_String) # pyright: ignore[reportAssignmentType]
@classmethod
def new(cls, nsptagname: str, val: str):
"""A new `CT_String`` element with tagname `nsptagname` and `val` attribute set to `val`."""
elm = cast(CT_String, OxmlElement(nsptagname))
elm.val = val
return elm

View File

@@ -0,0 +1,434 @@
# pyright: reportImportCycles=false
"""Simple-type classes, corresponding to ST_* schema items.
These provide validation and format translation for values stored in XML element
attributes. Naming generally corresponds to the simple type in the associated XML
schema.
"""
from __future__ import annotations
import datetime as dt
from typing import TYPE_CHECKING, Any, Tuple
from docx.exceptions import InvalidXmlError
from docx.shared import Emu, Pt, RGBColor, Twips
if TYPE_CHECKING:
from docx.shared import Length
class BaseSimpleType:
"""Base class for simple-types."""
@classmethod
def from_xml(cls, xml_value: str) -> Any:
return cls.convert_from_xml(xml_value)
@classmethod
def to_xml(cls, value: Any) -> str:
cls.validate(value)
str_value = cls.convert_to_xml(value)
return str_value
@classmethod
def convert_from_xml(cls, str_value: str) -> Any:
return int(str_value)
@classmethod
def convert_to_xml(cls, value: Any) -> str: ...
@classmethod
def validate(cls, value: Any) -> None: ...
@classmethod
def validate_int(cls, value: object):
if not isinstance(value, int):
raise TypeError("value must be <type 'int'>, got %s" % type(value))
@classmethod
def validate_int_in_range(cls, value: int, min_inclusive: int, max_inclusive: int) -> None:
cls.validate_int(value)
if value < min_inclusive or value > max_inclusive:
raise ValueError(
"value must be in range %d to %d inclusive, got %d"
% (min_inclusive, max_inclusive, value)
)
@classmethod
def validate_string(cls, value: Any) -> str:
if not isinstance(value, str):
raise TypeError("value must be a string, got %s" % type(value))
return value
class BaseIntType(BaseSimpleType):
@classmethod
def convert_from_xml(cls, str_value: str) -> int:
return int(str_value)
@classmethod
def convert_to_xml(cls, value: int) -> str:
return str(value)
@classmethod
def validate(cls, value: Any) -> None:
cls.validate_int(value)
class BaseStringType(BaseSimpleType):
@classmethod
def convert_from_xml(cls, str_value: str) -> str:
return str_value
@classmethod
def convert_to_xml(cls, value: str) -> str:
return value
@classmethod
def validate(cls, value: str):
cls.validate_string(value)
class BaseStringEnumerationType(BaseStringType):
_members: Tuple[str, ...]
@classmethod
def validate(cls, value: Any) -> None:
cls.validate_string(value)
if value not in cls._members:
raise ValueError("must be one of %s, got '%s'" % (cls._members, value))
class XsdAnyUri(BaseStringType):
"""There's a regex in the spec this is supposed to meet...
but current assessment is that spending cycles on validating wouldn't be worth it
for the number of programming errors it would catch.
"""
class XsdBoolean(BaseSimpleType):
@classmethod
def convert_from_xml(cls, str_value: str) -> bool:
if str_value not in ("1", "0", "true", "false"):
raise InvalidXmlError(
"value must be one of '1', '0', 'true' or 'false', got '%s'" % str_value
)
return str_value in ("1", "true")
@classmethod
def convert_to_xml(cls, value: bool) -> str:
return {True: "1", False: "0"}[value]
@classmethod
def validate(cls, value: Any) -> None:
if value not in (True, False):
raise TypeError(
"only True or False (and possibly None) may be assigned, got '%s'" % value
)
class XsdId(BaseStringType):
"""String that must begin with a letter or underscore and cannot contain any colons.
Not fully validated because not used in external API.
"""
pass
class XsdInt(BaseIntType):
@classmethod
def validate(cls, value: Any) -> None:
cls.validate_int_in_range(value, -2147483648, 2147483647)
class XsdLong(BaseIntType):
@classmethod
def validate(cls, value: Any) -> None:
cls.validate_int_in_range(value, -9223372036854775808, 9223372036854775807)
class XsdString(BaseStringType):
pass
class XsdStringEnumeration(BaseStringEnumerationType):
"""Set of enumerated xsd:string values."""
class XsdToken(BaseStringType):
"""Xsd:string with whitespace collapsing, e.g. multiple spaces reduced to one,
leading and trailing space stripped."""
pass
class XsdUnsignedInt(BaseIntType):
@classmethod
def validate(cls, value: Any) -> None:
cls.validate_int_in_range(value, 0, 4294967295)
class XsdUnsignedLong(BaseIntType):
@classmethod
def validate(cls, value: Any) -> None:
cls.validate_int_in_range(value, 0, 18446744073709551615)
class ST_BrClear(XsdString):
@classmethod
def validate(cls, value: str) -> None:
cls.validate_string(value)
valid_values = ("none", "left", "right", "all")
if value not in valid_values:
raise ValueError("must be one of %s, got '%s'" % (valid_values, value))
class ST_BrType(XsdString):
@classmethod
def validate(cls, value: Any) -> None:
cls.validate_string(value)
valid_values = ("page", "column", "textWrapping")
if value not in valid_values:
raise ValueError("must be one of %s, got '%s'" % (valid_values, value))
class ST_Coordinate(BaseIntType):
@classmethod
def convert_from_xml(cls, str_value: str) -> Length:
if "i" in str_value or "m" in str_value or "p" in str_value:
return ST_UniversalMeasure.convert_from_xml(str_value)
return Emu(int(str_value))
@classmethod
def validate(cls, value: Any) -> None:
ST_CoordinateUnqualified.validate(value)
class ST_CoordinateUnqualified(XsdLong):
@classmethod
def validate(cls, value: Any) -> None:
cls.validate_int_in_range(value, -27273042329600, 27273042316900)
class ST_DateTime(BaseSimpleType):
@classmethod
def convert_from_xml(cls, str_value: str) -> dt.datetime:
"""Convert an xsd:dateTime string to a datetime object."""
def parse_xsd_datetime(dt_str: str) -> dt.datetime:
# -- handle trailing 'Z' (Zulu/UTC), common in Word files --
if dt_str.endswith("Z"):
try:
# -- optional fractional seconds case --
return dt.datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%fZ").replace(
tzinfo=dt.timezone.utc
)
except ValueError:
return dt.datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%SZ").replace(
tzinfo=dt.timezone.utc
)
# -- handles explicit offsets like +00:00, -05:00, or naive datetimes --
try:
return dt.datetime.fromisoformat(dt_str)
except ValueError:
# -- fall-back to parsing as naive datetime (with or without fractional seconds) --
try:
return dt.datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S.%f")
except ValueError:
return dt.datetime.strptime(dt_str, "%Y-%m-%dT%H:%M:%S")
try:
# -- parse anything reasonable, but never raise, just use default epoch time --
return parse_xsd_datetime(str_value)
except Exception:
return dt.datetime(1970, 1, 1, tzinfo=dt.timezone.utc)
@classmethod
def convert_to_xml(cls, value: dt.datetime) -> str:
# -- convert naive datetime to timezon-aware assuming local timezone --
if value.tzinfo is None:
value = value.astimezone()
# -- convert to UTC if not already --
value = value.astimezone(dt.timezone.utc)
# -- format with 'Z' suffix for UTC --
return value.strftime("%Y-%m-%dT%H:%M:%SZ")
@classmethod
def validate(cls, value: Any) -> None:
if not isinstance(value, dt.datetime):
raise TypeError("only a datetime.datetime object may be assigned, got '%s'" % value)
class ST_DecimalNumber(XsdInt):
pass
class ST_DrawingElementId(XsdUnsignedInt):
pass
class ST_HexColor(BaseStringType):
@classmethod
def convert_from_xml( # pyright: ignore[reportIncompatibleMethodOverride]
cls, str_value: str
) -> RGBColor | str:
if str_value == "auto":
return ST_HexColorAuto.AUTO
return RGBColor.from_string(str_value)
@classmethod
def convert_to_xml( # pyright: ignore[reportIncompatibleMethodOverride]
cls, value: RGBColor
) -> str:
"""Keep alpha hex numerals all uppercase just for consistency."""
# expecting 3-tuple of ints in range 0-255
return "%02X%02X%02X" % value
@classmethod
def validate(cls, value: Any) -> None:
# must be an RGBColor object ---
if not isinstance(value, RGBColor):
raise ValueError(
"rgb color value must be RGBColor object, got %s %s" % (type(value), value)
)
class ST_HexColorAuto(XsdStringEnumeration):
"""Value for `w:color/[@val="auto"] attribute setting."""
AUTO = "auto"
_members = (AUTO,)
class ST_HpsMeasure(XsdUnsignedLong):
"""Half-point measure, e.g. 24.0 represents 12.0 points."""
@classmethod
def convert_from_xml(cls, str_value: str) -> Length:
if "m" in str_value or "n" in str_value or "p" in str_value:
return ST_UniversalMeasure.convert_from_xml(str_value)
return Pt(int(str_value) / 2.0)
@classmethod
def convert_to_xml(cls, value: int | Length) -> str:
emu = Emu(value)
half_points = int(emu.pt * 2)
return str(half_points)
class ST_Merge(XsdStringEnumeration):
"""Valid values for <w:xMerge val=""> attribute."""
CONTINUE = "continue"
RESTART = "restart"
_members = (CONTINUE, RESTART)
class ST_OnOff(XsdBoolean):
@classmethod
def convert_from_xml(cls, str_value: str) -> bool:
if str_value not in ("1", "0", "true", "false", "on", "off"):
raise InvalidXmlError(
"value must be one of '1', '0', 'true', 'false', 'on', or 'o"
"ff', got '%s'" % str_value
)
return str_value in ("1", "true", "on")
class ST_PositiveCoordinate(XsdLong):
@classmethod
def convert_from_xml(cls, str_value: str) -> Length:
return Emu(int(str_value))
@classmethod
def validate(cls, value: Any) -> None:
cls.validate_int_in_range(value, 0, 27273042316900)
class ST_RelationshipId(XsdString):
pass
class ST_SignedTwipsMeasure(XsdInt):
@classmethod
def convert_from_xml(cls, str_value: str) -> Length:
if "i" in str_value or "m" in str_value or "p" in str_value:
return ST_UniversalMeasure.convert_from_xml(str_value)
return Twips(int(round(float(str_value))))
@classmethod
def convert_to_xml(cls, value: int | Length) -> str:
emu = Emu(value)
twips = emu.twips
return str(twips)
class ST_String(XsdString):
pass
class ST_TblLayoutType(XsdString):
@classmethod
def validate(cls, value: Any) -> None:
cls.validate_string(value)
valid_values = ("fixed", "autofit")
if value not in valid_values:
raise ValueError("must be one of %s, got '%s'" % (valid_values, value))
class ST_TblWidth(XsdString):
@classmethod
def validate(cls, value: Any) -> None:
cls.validate_string(value)
valid_values = ("auto", "dxa", "nil", "pct")
if value not in valid_values:
raise ValueError("must be one of %s, got '%s'" % (valid_values, value))
class ST_TwipsMeasure(XsdUnsignedLong):
@classmethod
def convert_from_xml(cls, str_value: str) -> Length:
if "i" in str_value or "m" in str_value or "p" in str_value:
return ST_UniversalMeasure.convert_from_xml(str_value)
return Twips(int(str_value))
@classmethod
def convert_to_xml(cls, value: int | Length) -> str:
emu = Emu(value)
twips = emu.twips
return str(twips)
class ST_UniversalMeasure(BaseSimpleType):
@classmethod
def convert_from_xml(cls, str_value: str) -> Emu:
float_part, units_part = str_value[:-2], str_value[-2:]
quantity = float(float_part)
multiplier = {
"mm": 36000,
"cm": 360000,
"in": 914400,
"pt": 12700,
"pc": 152400,
"pi": 152400,
}[units_part]
return Emu(int(round(quantity * multiplier)))
class ST_VerticalAlignRun(XsdStringEnumeration):
"""Valid values for `w:vertAlign/@val`."""
BASELINE = "baseline"
SUPERSCRIPT = "superscript"
SUBSCRIPT = "subscript"
_members = (BASELINE, SUPERSCRIPT, SUBSCRIPT)

View File

@@ -0,0 +1,320 @@
"""Custom element classes related to the styles part."""
from __future__ import annotations
from docx.enum.style import WD_STYLE_TYPE
from docx.oxml.simpletypes import ST_DecimalNumber, ST_OnOff, ST_String
from docx.oxml.xmlchemy import (
BaseOxmlElement,
OptionalAttribute,
RequiredAttribute,
ZeroOrMore,
ZeroOrOne,
)
def styleId_from_name(name):
"""Return the style id corresponding to `name`, taking into account special-case
names such as 'Heading 1'."""
return {
"caption": "Caption",
"heading 1": "Heading1",
"heading 2": "Heading2",
"heading 3": "Heading3",
"heading 4": "Heading4",
"heading 5": "Heading5",
"heading 6": "Heading6",
"heading 7": "Heading7",
"heading 8": "Heading8",
"heading 9": "Heading9",
}.get(name, name.replace(" ", ""))
class CT_LatentStyles(BaseOxmlElement):
"""`w:latentStyles` element, defining behavior defaults for latent styles and
containing `w:lsdException` child elements that each override those defaults for a
named latent style."""
lsdException = ZeroOrMore("w:lsdException", successors=())
count = OptionalAttribute("w:count", ST_DecimalNumber)
defLockedState = OptionalAttribute("w:defLockedState", ST_OnOff)
defQFormat = OptionalAttribute("w:defQFormat", ST_OnOff)
defSemiHidden = OptionalAttribute("w:defSemiHidden", ST_OnOff)
defUIPriority = OptionalAttribute("w:defUIPriority", ST_DecimalNumber)
defUnhideWhenUsed = OptionalAttribute("w:defUnhideWhenUsed", ST_OnOff)
def bool_prop(self, attr_name):
"""Return the boolean value of the attribute having `attr_name`, or |False| if
not present."""
value = getattr(self, attr_name)
if value is None:
return False
return value
def get_by_name(self, name):
"""Return the `w:lsdException` child having `name`, or |None| if not found."""
found = self.xpath('w:lsdException[@w:name="%s"]' % name)
if not found:
return None
return found[0]
def set_bool_prop(self, attr_name, value):
"""Set the on/off attribute having `attr_name` to `value`."""
setattr(self, attr_name, bool(value))
class CT_LsdException(BaseOxmlElement):
"""``<w:lsdException>`` element, defining override visibility behaviors for a named
latent style."""
locked = OptionalAttribute("w:locked", ST_OnOff)
name = RequiredAttribute("w:name", ST_String)
qFormat = OptionalAttribute("w:qFormat", ST_OnOff)
semiHidden = OptionalAttribute("w:semiHidden", ST_OnOff)
uiPriority = OptionalAttribute("w:uiPriority", ST_DecimalNumber)
unhideWhenUsed = OptionalAttribute("w:unhideWhenUsed", ST_OnOff)
def delete(self):
"""Remove this `w:lsdException` element from the XML document."""
self.getparent().remove(self)
def on_off_prop(self, attr_name):
"""Return the boolean value of the attribute having `attr_name`, or |None| if
not present."""
return getattr(self, attr_name)
def set_on_off_prop(self, attr_name, value):
"""Set the on/off attribute having `attr_name` to `value`."""
setattr(self, attr_name, value)
class CT_Style(BaseOxmlElement):
"""A ``<w:style>`` element, representing a style definition."""
_tag_seq = (
"w:name",
"w:aliases",
"w:basedOn",
"w:next",
"w:link",
"w:autoRedefine",
"w:hidden",
"w:uiPriority",
"w:semiHidden",
"w:unhideWhenUsed",
"w:qFormat",
"w:locked",
"w:personal",
"w:personalCompose",
"w:personalReply",
"w:rsid",
"w:pPr",
"w:rPr",
"w:tblPr",
"w:trPr",
"w:tcPr",
"w:tblStylePr",
)
name = ZeroOrOne("w:name", successors=_tag_seq[1:])
basedOn = ZeroOrOne("w:basedOn", successors=_tag_seq[3:])
next = ZeroOrOne("w:next", successors=_tag_seq[4:])
uiPriority = ZeroOrOne("w:uiPriority", successors=_tag_seq[8:])
semiHidden = ZeroOrOne("w:semiHidden", successors=_tag_seq[9:])
unhideWhenUsed = ZeroOrOne("w:unhideWhenUsed", successors=_tag_seq[10:])
qFormat = ZeroOrOne("w:qFormat", successors=_tag_seq[11:])
locked = ZeroOrOne("w:locked", successors=_tag_seq[12:])
pPr = ZeroOrOne("w:pPr", successors=_tag_seq[17:])
rPr = ZeroOrOne("w:rPr", successors=_tag_seq[18:])
del _tag_seq
type: WD_STYLE_TYPE | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:type", WD_STYLE_TYPE
)
styleId: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:styleId", ST_String
)
default = OptionalAttribute("w:default", ST_OnOff)
customStyle = OptionalAttribute("w:customStyle", ST_OnOff)
@property
def basedOn_val(self):
"""Value of `w:basedOn/@w:val` or |None| if not present."""
basedOn = self.basedOn
if basedOn is None:
return None
return basedOn.val
@basedOn_val.setter
def basedOn_val(self, value):
if value is None:
self._remove_basedOn()
else:
self.get_or_add_basedOn().val = value
@property
def base_style(self):
"""Sibling CT_Style element this style is based on or |None| if no base style or
base style not found."""
basedOn = self.basedOn
if basedOn is None:
return None
styles = self.getparent()
base_style = styles.get_by_id(basedOn.val)
if base_style is None:
return None
return base_style
def delete(self):
"""Remove this `w:style` element from its parent `w:styles` element."""
self.getparent().remove(self)
@property
def locked_val(self):
"""Value of `w:locked/@w:val` or |False| if not present."""
locked = self.locked
if locked is None:
return False
return locked.val
@locked_val.setter
def locked_val(self, value):
self._remove_locked()
if bool(value) is True:
locked = self._add_locked()
locked.val = value
@property
def name_val(self):
"""Value of ``<w:name>`` child or |None| if not present."""
name = self.name
if name is None:
return None
return name.val
@name_val.setter
def name_val(self, value):
self._remove_name()
if value is not None:
name = self._add_name()
name.val = value
@property
def next_style(self):
"""Sibling CT_Style element identified by the value of `w:name/@w:val` or |None|
if no value is present or no style with that style id is found."""
next = self.next
if next is None:
return None
styles = self.getparent()
return styles.get_by_id(next.val) # None if not found
@property
def qFormat_val(self):
"""Value of `w:qFormat/@w:val` or |False| if not present."""
qFormat = self.qFormat
if qFormat is None:
return False
return qFormat.val
@qFormat_val.setter
def qFormat_val(self, value):
self._remove_qFormat()
if bool(value):
self._add_qFormat()
@property
def semiHidden_val(self):
"""Value of ``<w:semiHidden>`` child or |False| if not present."""
semiHidden = self.semiHidden
if semiHidden is None:
return False
return semiHidden.val
@semiHidden_val.setter
def semiHidden_val(self, value):
self._remove_semiHidden()
if bool(value) is True:
semiHidden = self._add_semiHidden()
semiHidden.val = value
@property
def uiPriority_val(self):
"""Value of ``<w:uiPriority>`` child or |None| if not present."""
uiPriority = self.uiPriority
if uiPriority is None:
return None
return uiPriority.val
@uiPriority_val.setter
def uiPriority_val(self, value):
self._remove_uiPriority()
if value is not None:
uiPriority = self._add_uiPriority()
uiPriority.val = value
@property
def unhideWhenUsed_val(self):
"""Value of `w:unhideWhenUsed/@w:val` or |False| if not present."""
unhideWhenUsed = self.unhideWhenUsed
if unhideWhenUsed is None:
return False
return unhideWhenUsed.val
@unhideWhenUsed_val.setter
def unhideWhenUsed_val(self, value):
self._remove_unhideWhenUsed()
if bool(value) is True:
unhideWhenUsed = self._add_unhideWhenUsed()
unhideWhenUsed.val = value
class CT_Styles(BaseOxmlElement):
"""``<w:styles>`` element, the root element of a styles part, i.e. styles.xml."""
_tag_seq = ("w:docDefaults", "w:latentStyles", "w:style")
latentStyles = ZeroOrOne("w:latentStyles", successors=_tag_seq[2:])
style = ZeroOrMore("w:style", successors=())
del _tag_seq
def add_style_of_type(self, name, style_type, builtin):
"""Return a newly added `w:style` element having `name` and `style_type`.
`w:style/@customStyle` is set based on the value of `builtin`.
"""
style = self.add_style()
style.type = style_type
style.customStyle = None if builtin else True
style.styleId = styleId_from_name(name)
style.name_val = name
return style
def default_for(self, style_type):
"""Return `w:style[@w:type="*{style_type}*][-1]` or |None| if not found."""
default_styles_for_type = [
s for s in self._iter_styles() if s.type == style_type and s.default
]
if not default_styles_for_type:
return None
# spec calls for last default in document order
return default_styles_for_type[-1]
def get_by_id(self, styleId: str) -> CT_Style | None:
"""`w:style` child where @styleId = `styleId`.
|None| if not found.
"""
xpath = f'w:style[@w:styleId="{styleId}"]'
return next(iter(self.xpath(xpath)), None)
def get_by_name(self, name: str) -> CT_Style | None:
"""`w:style` child with `w:name` grandchild having value `name`.
|None| if not found.
"""
xpath = 'w:style[w:name/@w:val="%s"]' % name
return next(iter(self.xpath(xpath)), None)
def _iter_styles(self):
"""Generate each of the `w:style` child elements in document order."""
return (style for style in self.xpath("w:style"))

View File

@@ -0,0 +1,977 @@
"""Custom element classes for tables."""
from __future__ import annotations
from typing import TYPE_CHECKING, Callable, cast
from docx.enum.table import WD_CELL_VERTICAL_ALIGNMENT, WD_ROW_HEIGHT_RULE, WD_TABLE_DIRECTION
from docx.exceptions import InvalidSpanError
from docx.oxml.ns import nsdecls, qn
from docx.oxml.parser import parse_xml
from docx.oxml.shared import CT_DecimalNumber
from docx.oxml.simpletypes import (
ST_Merge,
ST_TblLayoutType,
ST_TblWidth,
ST_TwipsMeasure,
XsdInt,
)
from docx.oxml.text.paragraph import CT_P
from docx.oxml.xmlchemy import (
BaseOxmlElement,
OneAndOnlyOne,
OneOrMore,
OptionalAttribute,
RequiredAttribute,
ZeroOrMore,
ZeroOrOne,
)
from docx.shared import Emu, Length, Twips
if TYPE_CHECKING:
from docx.enum.table import WD_TABLE_ALIGNMENT
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml.shared import CT_OnOff, CT_String
from docx.oxml.text.parfmt import CT_Jc
class CT_Height(BaseOxmlElement):
"""Used for `w:trHeight` to specify a row height and row height rule."""
val: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:val", ST_TwipsMeasure
)
hRule: WD_ROW_HEIGHT_RULE | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:hRule", WD_ROW_HEIGHT_RULE
)
class CT_Row(BaseOxmlElement):
"""``<w:tr>`` element."""
add_tc: Callable[[], CT_Tc]
get_or_add_trPr: Callable[[], CT_TrPr]
_add_trPr: Callable[[], CT_TrPr]
tc_lst: list[CT_Tc]
# -- custom inserter below --
tblPrEx: CT_TblPrEx | None = ZeroOrOne("w:tblPrEx") # pyright: ignore[reportAssignmentType]
# -- custom inserter below --
trPr: CT_TrPr | None = ZeroOrOne("w:trPr") # pyright: ignore[reportAssignmentType]
tc = ZeroOrMore("w:tc")
@property
def grid_after(self) -> int:
"""The number of unpopulated layout-grid cells at the end of this row."""
trPr = self.trPr
if trPr is None:
return 0
return trPr.grid_after
@property
def grid_before(self) -> int:
"""The number of unpopulated layout-grid cells at the start of this row."""
trPr = self.trPr
if trPr is None:
return 0
return trPr.grid_before
def tc_at_grid_offset(self, grid_offset: int) -> CT_Tc:
"""The `tc` element in this tr at exact `grid offset`.
Raises ValueError when this `w:tr` contains no `w:tc` with exact starting `grid_offset`.
"""
# -- account for omitted cells at the start of the row --
remaining_offset = grid_offset - self.grid_before
for tc in self.tc_lst:
# -- We've gone past grid_offset without finding a tc, no sense searching further. --
if remaining_offset < 0:
break
# -- We've arrived at grid_offset, this is the `w:tc` we're looking for. --
if remaining_offset == 0:
return tc
# -- We're not there yet, skip forward the number of layout-grid cells this cell
# -- occupies.
remaining_offset -= tc.grid_span
raise ValueError(f"no `tc` element at grid_offset={grid_offset}")
@property
def tr_idx(self) -> int:
"""Index of this `w:tr` element within its parent `w:tbl` element."""
tbl = cast(CT_Tbl, self.getparent())
return tbl.tr_lst.index(self)
@property
def trHeight_hRule(self) -> WD_ROW_HEIGHT_RULE | None:
"""The value of `./w:trPr/w:trHeight/@w:hRule`, or |None| if not present."""
trPr = self.trPr
if trPr is None:
return None
return trPr.trHeight_hRule
@trHeight_hRule.setter
def trHeight_hRule(self, value: WD_ROW_HEIGHT_RULE | None):
trPr = self.get_or_add_trPr()
trPr.trHeight_hRule = value
@property
def trHeight_val(self):
"""Return the value of `w:trPr/w:trHeight@w:val`, or |None| if not present."""
trPr = self.trPr
if trPr is None:
return None
return trPr.trHeight_val
@trHeight_val.setter
def trHeight_val(self, value: Length | None):
trPr = self.get_or_add_trPr()
trPr.trHeight_val = value
def _insert_tblPrEx(self, tblPrEx: CT_TblPrEx):
self.insert(0, tblPrEx)
def _insert_trPr(self, trPr: CT_TrPr):
tblPrEx = self.tblPrEx
if tblPrEx is not None:
tblPrEx.addnext(trPr)
else:
self.insert(0, trPr)
def _new_tc(self):
return CT_Tc.new()
class CT_Tbl(BaseOxmlElement):
"""``<w:tbl>`` element."""
add_tr: Callable[[], CT_Row]
tr_lst: list[CT_Row]
tblPr: CT_TblPr = OneAndOnlyOne("w:tblPr") # pyright: ignore[reportAssignmentType]
tblGrid: CT_TblGrid = OneAndOnlyOne("w:tblGrid") # pyright: ignore[reportAssignmentType]
tr = ZeroOrMore("w:tr")
@property
def bidiVisual_val(self) -> bool | None:
"""Value of `./w:tblPr/w:bidiVisual/@w:val` or |None| if not present.
Controls whether table cells are displayed right-to-left or left-to-right.
"""
bidiVisual = self.tblPr.bidiVisual
if bidiVisual is None:
return None
return bidiVisual.val
@bidiVisual_val.setter
def bidiVisual_val(self, value: WD_TABLE_DIRECTION | None):
tblPr = self.tblPr
if value is None:
tblPr._remove_bidiVisual() # pyright: ignore[reportPrivateUsage]
else:
tblPr.get_or_add_bidiVisual().val = bool(value)
@property
def col_count(self):
"""The number of grid columns in this table."""
return len(self.tblGrid.gridCol_lst)
def iter_tcs(self):
"""Generate each of the `w:tc` elements in this table, left to right and top to
bottom.
Each cell in the first row is generated, followed by each cell in the second
row, etc.
"""
for tr in self.tr_lst:
for tc in tr.tc_lst:
yield tc
@classmethod
def new_tbl(cls, rows: int, cols: int, width: Length) -> CT_Tbl:
"""Return a new `w:tbl` element having `rows` rows and `cols` columns.
`width` is distributed evenly between the columns.
"""
return cast(CT_Tbl, parse_xml(cls._tbl_xml(rows, cols, width)))
@property
def tblStyle_val(self) -> str | None:
"""`w:tblPr/w:tblStyle/@w:val` (a table style id) or |None| if not present."""
tblStyle = self.tblPr.tblStyle
if tblStyle is None:
return None
return tblStyle.val
@tblStyle_val.setter
def tblStyle_val(self, styleId: str | None) -> None:
"""Set the value of `w:tblPr/w:tblStyle/@w:val` (a table style id) to `styleId`.
If `styleId` is None, remove the `w:tblStyle` element.
"""
tblPr = self.tblPr
tblPr._remove_tblStyle() # pyright: ignore[reportPrivateUsage]
if styleId is None:
return
tblPr._add_tblStyle().val = styleId # pyright: ignore[reportPrivateUsage]
@classmethod
def _tbl_xml(cls, rows: int, cols: int, width: Length) -> str:
col_width = Emu(width // cols) if cols > 0 else Emu(0)
return (
f"<w:tbl {nsdecls('w')}>\n"
f" <w:tblPr>\n"
f' <w:tblW w:type="auto" w:w="0"/>\n'
f' <w:tblLook w:firstColumn="1" w:firstRow="1"\n'
f' w:lastColumn="0" w:lastRow="0" w:noHBand="0"\n'
f' w:noVBand="1" w:val="04A0"/>\n'
f" </w:tblPr>\n"
f"{cls._tblGrid_xml(cols, col_width)}"
f"{cls._trs_xml(rows, cols, col_width)}"
f"</w:tbl>\n"
)
@classmethod
def _tblGrid_xml(cls, col_count: int, col_width: Length) -> str:
xml = " <w:tblGrid>\n"
for _ in range(col_count):
xml += ' <w:gridCol w:w="%d"/>\n' % col_width.twips
xml += " </w:tblGrid>\n"
return xml
@classmethod
def _trs_xml(cls, row_count: int, col_count: int, col_width: Length) -> str:
return f" <w:tr>\n{cls._tcs_xml(col_count, col_width)} </w:tr>\n" * row_count
@classmethod
def _tcs_xml(cls, col_count: int, col_width: Length) -> str:
return (
f" <w:tc>\n"
f" <w:tcPr>\n"
f' <w:tcW w:type="dxa" w:w="{col_width.twips}"/>\n'
f" </w:tcPr>\n"
f" <w:p/>\n"
f" </w:tc>\n"
) * col_count
class CT_TblGrid(BaseOxmlElement):
"""`w:tblGrid` element.
Child of `w:tbl`, holds `w:gridCol> elements that define column count, width, etc.
"""
add_gridCol: Callable[[], CT_TblGridCol]
gridCol_lst: list[CT_TblGridCol]
gridCol = ZeroOrMore("w:gridCol", successors=("w:tblGridChange",))
class CT_TblGridCol(BaseOxmlElement):
"""`w:gridCol` element, child of `w:tblGrid`, defines a table column."""
w: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:w", ST_TwipsMeasure
)
@property
def gridCol_idx(self) -> int:
"""Index of this `w:gridCol` element within its parent `w:tblGrid` element."""
tblGrid = cast(CT_TblGrid, self.getparent())
return tblGrid.gridCol_lst.index(self)
class CT_TblLayoutType(BaseOxmlElement):
"""`w:tblLayout` element.
Specifies whether column widths are fixed or can be automatically adjusted based on
content.
"""
type: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:type", ST_TblLayoutType
)
class CT_TblPr(BaseOxmlElement):
"""``<w:tblPr>`` element, child of ``<w:tbl>``, holds child elements that define
table properties such as style and borders."""
get_or_add_bidiVisual: Callable[[], CT_OnOff]
get_or_add_jc: Callable[[], CT_Jc]
get_or_add_tblLayout: Callable[[], CT_TblLayoutType]
_add_tblStyle: Callable[[], CT_String]
_remove_bidiVisual: Callable[[], None]
_remove_jc: Callable[[], None]
_remove_tblStyle: Callable[[], None]
_tag_seq = (
"w:tblStyle",
"w:tblpPr",
"w:tblOverlap",
"w:bidiVisual",
"w:tblStyleRowBandSize",
"w:tblStyleColBandSize",
"w:tblW",
"w:jc",
"w:tblCellSpacing",
"w:tblInd",
"w:tblBorders",
"w:shd",
"w:tblLayout",
"w:tblCellMar",
"w:tblLook",
"w:tblCaption",
"w:tblDescription",
"w:tblPrChange",
)
tblStyle: CT_String | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"w:tblStyle", successors=_tag_seq[1:]
)
bidiVisual: CT_OnOff | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"w:bidiVisual", successors=_tag_seq[4:]
)
jc: CT_Jc | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"w:jc", successors=_tag_seq[8:]
)
tblLayout: CT_TblLayoutType | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"w:tblLayout", successors=_tag_seq[13:]
)
del _tag_seq
@property
def alignment(self) -> WD_TABLE_ALIGNMENT | None:
"""Horizontal alignment of table, |None| if `./w:jc` is not present."""
jc = self.jc
if jc is None:
return None
return cast("WD_TABLE_ALIGNMENT | None", jc.val)
@alignment.setter
def alignment(self, value: WD_TABLE_ALIGNMENT | None):
self._remove_jc()
if value is None:
return
jc = self.get_or_add_jc()
jc.val = cast("WD_ALIGN_PARAGRAPH", value)
@property
def autofit(self) -> bool:
"""|False| when there is a `w:tblLayout` child with `@w:type="fixed"`.
Otherwise |True|.
"""
tblLayout = self.tblLayout
return True if tblLayout is None else tblLayout.type != "fixed"
@autofit.setter
def autofit(self, value: bool):
tblLayout = self.get_or_add_tblLayout()
tblLayout.type = "autofit" if value else "fixed"
@property
def style(self):
"""Return the value of the ``val`` attribute of the ``<w:tblStyle>`` child or
|None| if not present."""
tblStyle = self.tblStyle
if tblStyle is None:
return None
return tblStyle.val
@style.setter
def style(self, value: str | None):
self._remove_tblStyle()
if value is None:
return
self._add_tblStyle().val = value
class CT_TblPrEx(BaseOxmlElement):
"""`w:tblPrEx` element, exceptions to table-properties.
Applied at a lower level, like a `w:tr` to modify the appearance. Possibly used when
two tables are merged. For more see:
http://officeopenxml.com/WPtablePropertyExceptions.php
"""
class CT_TblWidth(BaseOxmlElement):
"""Used for `w:tblW` and `w:tcW` and others, specifies a table-related width."""
# the type for `w` attr is actually ST_MeasurementOrPercent, but using
# XsdInt for now because only dxa (twips) values are being used. It's not
# entirely clear what the semantics are for other values like -01.4mm
w: int = RequiredAttribute("w:w", XsdInt) # pyright: ignore[reportAssignmentType]
type = RequiredAttribute("w:type", ST_TblWidth)
@property
def width(self) -> Length | None:
"""EMU length indicated by the combined `w:w` and `w:type` attrs."""
if self.type != "dxa":
return None
return Twips(self.w)
@width.setter
def width(self, value: Length):
self.type = "dxa"
self.w = Emu(value).twips
class CT_Tc(BaseOxmlElement):
"""`w:tc` table cell element."""
add_p: Callable[[], CT_P]
get_or_add_tcPr: Callable[[], CT_TcPr]
p_lst: list[CT_P]
tbl_lst: list[CT_Tbl]
_insert_tbl: Callable[[CT_Tbl], CT_Tbl]
_new_p: Callable[[], CT_P]
# -- tcPr has many successors, `._insert_tcPr()` is overridden below --
tcPr: CT_TcPr | None = ZeroOrOne("w:tcPr") # pyright: ignore[reportAssignmentType]
p = OneOrMore("w:p")
tbl = OneOrMore("w:tbl")
@property
def bottom(self) -> int:
"""The row index that marks the bottom extent of the vertical span of this cell.
This is one greater than the index of the bottom-most row of the span, similar
to how a slice of the cell's rows would be specified.
"""
if self.vMerge is not None:
tc_below = self._tc_below
if tc_below is not None and tc_below.vMerge == ST_Merge.CONTINUE:
return tc_below.bottom
return self._tr_idx + 1
def clear_content(self):
"""Remove all content elements, preserving `w:tcPr` element if present.
Note that this leaves the `w:tc` element in an invalid state because it doesn't
contain at least one block-level element. It's up to the caller to add a
`w:p`child element as the last content element.
"""
# -- remove all cell inner-content except a `w:tcPr` when present. --
for e in self.xpath("./*[not(self::w:tcPr)]"):
self.remove(e)
@property
def grid_offset(self) -> int:
"""Starting offset of `tc` in the layout-grid columns of its table.
A cell in the leftmost grid-column has offset 0.
"""
grid_before = self._tr.grid_before
preceding_tc_grid_spans = sum(
tc.grid_span for tc in self.xpath("./preceding-sibling::w:tc")
)
return grid_before + preceding_tc_grid_spans
@property
def grid_span(self) -> int:
"""The integer number of columns this cell spans.
Determined by ./w:tcPr/w:gridSpan/@val, it defaults to 1.
"""
tcPr = self.tcPr
return 1 if tcPr is None else tcPr.grid_span
@grid_span.setter
def grid_span(self, value: int):
tcPr = self.get_or_add_tcPr()
tcPr.grid_span = value
@property
def inner_content_elements(self) -> list[CT_P | CT_Tbl]:
"""Generate all `w:p` and `w:tbl` elements in this document-body.
Elements appear in document order. Elements shaded by nesting in a `w:ins` or
other "wrapper" element will not be included.
"""
return self.xpath("./w:p | ./w:tbl")
def iter_block_items(self):
"""Generate a reference to each of the block-level content elements in this
cell, in the order they appear."""
block_item_tags = (qn("w:p"), qn("w:tbl"), qn("w:sdt"))
for child in self:
if child.tag in block_item_tags:
yield child
@property
def left(self) -> int:
"""The grid column index at which this ``<w:tc>`` element appears."""
return self.grid_offset
def merge(self, other_tc: CT_Tc) -> CT_Tc:
"""Return top-left `w:tc` element of a new span.
Span is formed by merging the rectangular region defined by using this tc
element and `other_tc` as diagonal corners.
"""
top, left, height, width = self._span_dimensions(other_tc)
top_tc = self._tbl.tr_lst[top].tc_at_grid_offset(left)
top_tc._grow_to(width, height)
return top_tc
@classmethod
def new(cls) -> CT_Tc:
"""A new `w:tc` element, containing an empty paragraph as the required EG_BlockLevelElt."""
return cast(CT_Tc, parse_xml("<w:tc %s><w:p/></w:tc>" % nsdecls("w")))
@property
def right(self) -> int:
"""The grid column index that marks the right-side extent of the horizontal span
of this cell.
This is one greater than the index of the right-most column of the span, similar
to how a slice of the cell's columns would be specified.
"""
return self.grid_offset + self.grid_span
@property
def top(self) -> int:
"""The top-most row index in the vertical span of this cell."""
if self.vMerge is None or self.vMerge == ST_Merge.RESTART:
return self._tr_idx
return self._tc_above.top
@property
def vMerge(self) -> str | None:
"""Value of ./w:tcPr/w:vMerge/@val, |None| if w:vMerge is not present."""
tcPr = self.tcPr
if tcPr is None:
return None
return tcPr.vMerge_val
@vMerge.setter
def vMerge(self, value: str | None):
tcPr = self.get_or_add_tcPr()
tcPr.vMerge_val = value
@property
def width(self) -> Length | None:
"""EMU length represented in `./w:tcPr/w:tcW` or |None| if not present."""
tcPr = self.tcPr
if tcPr is None:
return None
return tcPr.width
@width.setter
def width(self, value: Length):
tcPr = self.get_or_add_tcPr()
tcPr.width = value
def _add_width_of(self, other_tc: CT_Tc):
"""Add the width of `other_tc` to this cell.
Does nothing if either this tc or `other_tc` does not have a specified width.
"""
if self.width and other_tc.width:
self.width = Length(self.width + other_tc.width)
def _grow_to(self, width: int, height: int, top_tc: CT_Tc | None = None):
"""Grow this cell to `width` grid columns and `height` rows.
This is accomplished by expanding horizontal spans and creating continuation
cells to form vertical spans.
"""
def vMerge_val(top_tc: CT_Tc):
return (
ST_Merge.CONTINUE
if top_tc is not self
else None
if height == 1
else ST_Merge.RESTART
)
top_tc = self if top_tc is None else top_tc
self._span_to_width(width, top_tc, vMerge_val(top_tc))
if height > 1:
tc_below = self._tc_below
assert tc_below is not None
tc_below._grow_to(width, height - 1, top_tc)
def _insert_tcPr(self, tcPr: CT_TcPr) -> CT_TcPr:
"""Override default `._insert_tcPr()`."""
# -- `tcPr`` has a large number of successors, but always comes first if it appears,
# -- so just using insert(0, ...) rather than spelling out successors.
self.insert(0, tcPr)
return tcPr
@property
def _is_empty(self) -> bool:
"""True if this cell contains only a single empty `w:p` element."""
block_items = list(self.iter_block_items())
if len(block_items) > 1:
return False
# -- cell must include at least one block item but can be a `w:tbl`, `w:sdt`,
# -- `w:customXml` or a `w:p`
only_item = block_items[0]
return isinstance(only_item, CT_P) and len(only_item.r_lst) == 0
def _move_content_to(self, other_tc: CT_Tc):
"""Append the content of this cell to `other_tc`.
Leaves this cell with a single empty ``<w:p>`` element.
"""
if other_tc is self:
return
if self._is_empty:
return
other_tc._remove_trailing_empty_p()
# -- appending moves each element from self to other_tc --
for block_element in self.iter_block_items():
other_tc.append(block_element)
# -- add back the required minimum single empty <w:p> element --
self.append(self._new_p())
def _new_tbl(self) -> None:
raise NotImplementedError(
"use CT_Tbl.new_tbl() to add a new table, specifying rows and columns"
)
@property
def _next_tc(self) -> CT_Tc | None:
"""The `w:tc` element immediately following this one in this row, or |None| if
this is the last `w:tc` element in the row."""
following_tcs = self.xpath("./following-sibling::w:tc")
return following_tcs[0] if following_tcs else None
def _remove(self):
"""Remove this `w:tc` element from the XML tree."""
parent_element = self.getparent()
assert parent_element is not None
parent_element.remove(self)
def _remove_trailing_empty_p(self):
"""Remove last content element from this cell if it's an empty `w:p` element."""
block_items = list(self.iter_block_items())
last_content_elm = block_items[-1]
if not isinstance(last_content_elm, CT_P):
return
p = last_content_elm
if len(p.r_lst) > 0:
return
self.remove(p)
def _span_dimensions(self, other_tc: CT_Tc) -> tuple[int, int, int, int]:
"""Return a (top, left, height, width) 4-tuple specifying the extents of the
merged cell formed by using this tc and `other_tc` as opposite corner
extents."""
def raise_on_inverted_L(a: CT_Tc, b: CT_Tc):
if a.top == b.top and a.bottom != b.bottom:
raise InvalidSpanError("requested span not rectangular")
if a.left == b.left and a.right != b.right:
raise InvalidSpanError("requested span not rectangular")
def raise_on_tee_shaped(a: CT_Tc, b: CT_Tc):
top_most, other = (a, b) if a.top < b.top else (b, a)
if top_most.top < other.top and top_most.bottom > other.bottom:
raise InvalidSpanError("requested span not rectangular")
left_most, other = (a, b) if a.left < b.left else (b, a)
if left_most.left < other.left and left_most.right > other.right:
raise InvalidSpanError("requested span not rectangular")
raise_on_inverted_L(self, other_tc)
raise_on_tee_shaped(self, other_tc)
top = min(self.top, other_tc.top)
left = min(self.left, other_tc.left)
bottom = max(self.bottom, other_tc.bottom)
right = max(self.right, other_tc.right)
return top, left, bottom - top, right - left
def _span_to_width(self, grid_width: int, top_tc: CT_Tc, vMerge: str | None):
"""Incorporate `w:tc` elements to the right until this cell spans `grid_width`.
Incorporated `w:tc` elements are removed (replaced by gridSpan value).
Raises |ValueError| if `grid_width` cannot be exactly achieved, such as when a
merged cell would drive the span width greater than `grid_width` or if not
enough grid columns are available to make this cell that wide. All content from
incorporated cells is appended to `top_tc`. The val attribute of the vMerge
element on the single remaining cell is set to `vMerge`. If `vMerge` is |None|,
the vMerge element is removed if present.
"""
self._move_content_to(top_tc)
while self.grid_span < grid_width:
self._swallow_next_tc(grid_width, top_tc)
self.vMerge = vMerge
def _swallow_next_tc(self, grid_width: int, top_tc: CT_Tc):
"""Extend the horizontal span of this `w:tc` element to incorporate the
following `w:tc` element in the row and then delete that following `w:tc`
element.
Any content in the following `w:tc` element is appended to the content of
`top_tc`. The width of the following `w:tc` element is added to this one, if
present. Raises |InvalidSpanError| if the width of the resulting cell is greater
than `grid_width` or if there is no next `<w:tc>` element in the row.
"""
def raise_on_invalid_swallow(next_tc: CT_Tc | None):
if next_tc is None:
raise InvalidSpanError("not enough grid columns")
if self.grid_span + next_tc.grid_span > grid_width:
raise InvalidSpanError("span is not rectangular")
next_tc = self._next_tc
raise_on_invalid_swallow(next_tc)
assert next_tc is not None
next_tc._move_content_to(top_tc)
self._add_width_of(next_tc)
self.grid_span += next_tc.grid_span
next_tc._remove()
@property
def _tbl(self) -> CT_Tbl:
"""The tbl element this tc element appears in."""
return cast(CT_Tbl, self.xpath("./ancestor::w:tbl[position()=1]")[0])
@property
def _tc_above(self) -> CT_Tc:
"""The `w:tc` element immediately above this one in its grid column."""
return self._tr_above.tc_at_grid_offset(self.grid_offset)
@property
def _tc_below(self) -> CT_Tc | None:
"""The tc element immediately below this one in its grid column."""
tr_below = self._tr_below
if tr_below is None:
return None
return tr_below.tc_at_grid_offset(self.grid_offset)
@property
def _tr(self) -> CT_Row:
"""The tr element this tc element appears in."""
return cast(CT_Row, self.xpath("./ancestor::w:tr[position()=1]")[0])
@property
def _tr_above(self) -> CT_Row:
"""The tr element prior in sequence to the tr this cell appears in.
Raises |ValueError| if called on a cell in the top-most row.
"""
tr_aboves = self.xpath("./ancestor::w:tr[position()=1]/preceding-sibling::w:tr[1]")
if not tr_aboves:
raise ValueError("no tr above topmost tr in w:tbl")
return tr_aboves[0]
@property
def _tr_below(self) -> CT_Row | None:
"""The tr element next in sequence after the tr this cell appears in, or |None|
if this cell appears in the last row."""
tr_lst = self._tbl.tr_lst
tr_idx = tr_lst.index(self._tr)
try:
return tr_lst[tr_idx + 1]
except IndexError:
return None
@property
def _tr_idx(self) -> int:
"""The row index of the tr element this tc element appears in."""
return self._tbl.tr_lst.index(self._tr)
class CT_TcPr(BaseOxmlElement):
"""``<w:tcPr>`` element, defining table cell properties."""
get_or_add_gridSpan: Callable[[], CT_DecimalNumber]
get_or_add_tcW: Callable[[], CT_TblWidth]
get_or_add_vAlign: Callable[[], CT_VerticalJc]
_add_vMerge: Callable[[], CT_VMerge]
_remove_gridSpan: Callable[[], None]
_remove_vAlign: Callable[[], None]
_remove_vMerge: Callable[[], None]
_tag_seq = (
"w:cnfStyle",
"w:tcW",
"w:gridSpan",
"w:hMerge",
"w:vMerge",
"w:tcBorders",
"w:shd",
"w:noWrap",
"w:tcMar",
"w:textDirection",
"w:tcFitText",
"w:vAlign",
"w:hideMark",
"w:headers",
"w:cellIns",
"w:cellDel",
"w:cellMerge",
"w:tcPrChange",
)
tcW: CT_TblWidth | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"w:tcW", successors=_tag_seq[2:]
)
gridSpan: CT_DecimalNumber | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"w:gridSpan", successors=_tag_seq[3:]
)
vMerge: CT_VMerge | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"w:vMerge", successors=_tag_seq[5:]
)
vAlign: CT_VerticalJc | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"w:vAlign", successors=_tag_seq[12:]
)
del _tag_seq
@property
def grid_span(self) -> int:
"""The integer number of columns this cell spans.
Determined by ./w:gridSpan/@val, it defaults to 1.
"""
gridSpan = self.gridSpan
return 1 if gridSpan is None else gridSpan.val
@grid_span.setter
def grid_span(self, value: int):
self._remove_gridSpan()
if value > 1:
self.get_or_add_gridSpan().val = value
@property
def vAlign_val(self):
"""Value of `w:val` attribute on `w:vAlign` child.
Value is |None| if `w:vAlign` child is not present. The `w:val` attribute on
`w:vAlign` is required.
"""
vAlign = self.vAlign
if vAlign is None:
return None
return vAlign.val
@vAlign_val.setter
def vAlign_val(self, value: WD_CELL_VERTICAL_ALIGNMENT | None):
if value is None:
self._remove_vAlign()
return
self.get_or_add_vAlign().val = value
@property
def vMerge_val(self):
"""The value of the ./w:vMerge/@val attribute, or |None| if the w:vMerge element
is not present."""
vMerge = self.vMerge
if vMerge is None:
return None
return vMerge.val
@vMerge_val.setter
def vMerge_val(self, value: str | None):
self._remove_vMerge()
if value is not None:
self._add_vMerge().val = value
@property
def width(self) -> Length | None:
"""EMU length in `./w:tcW` or |None| if not present or its type is not 'dxa'."""
tcW = self.tcW
if tcW is None:
return None
return tcW.width
@width.setter
def width(self, value: Length):
tcW = self.get_or_add_tcW()
tcW.width = value
class CT_TrPr(BaseOxmlElement):
"""``<w:trPr>`` element, defining table row properties."""
get_or_add_trHeight: Callable[[], CT_Height]
_tag_seq = (
"w:cnfStyle",
"w:divId",
"w:gridBefore",
"w:gridAfter",
"w:wBefore",
"w:wAfter",
"w:cantSplit",
"w:trHeight",
"w:tblHeader",
"w:tblCellSpacing",
"w:jc",
"w:hidden",
"w:ins",
"w:del",
"w:trPrChange",
)
gridAfter: CT_DecimalNumber | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"w:gridAfter", successors=_tag_seq[4:]
)
gridBefore: CT_DecimalNumber | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"w:gridBefore", successors=_tag_seq[3:]
)
trHeight: CT_Height | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"w:trHeight", successors=_tag_seq[8:]
)
del _tag_seq
@property
def grid_after(self) -> int:
"""The number of unpopulated layout-grid cells at the end of this row."""
gridAfter = self.gridAfter
return 0 if gridAfter is None else gridAfter.val
@property
def grid_before(self) -> int:
"""The number of unpopulated layout-grid cells at the start of this row."""
gridBefore = self.gridBefore
return 0 if gridBefore is None else gridBefore.val
@property
def trHeight_hRule(self) -> WD_ROW_HEIGHT_RULE | None:
"""Return the value of `w:trHeight@w:hRule`, or |None| if not present."""
trHeight = self.trHeight
return None if trHeight is None else trHeight.hRule
@trHeight_hRule.setter
def trHeight_hRule(self, value: WD_ROW_HEIGHT_RULE | None):
if value is None and self.trHeight is None:
return
trHeight = self.get_or_add_trHeight()
trHeight.hRule = value
@property
def trHeight_val(self):
"""Return the value of `w:trHeight@w:val`, or |None| if not present."""
trHeight = self.trHeight
return None if trHeight is None else trHeight.val
@trHeight_val.setter
def trHeight_val(self, value: Length | None):
if value is None and self.trHeight is None:
return
trHeight = self.get_or_add_trHeight()
trHeight.val = value
class CT_VerticalJc(BaseOxmlElement):
"""`w:vAlign` element, specifying vertical alignment of cell."""
val: WD_CELL_VERTICAL_ALIGNMENT = RequiredAttribute( # pyright: ignore[reportAssignmentType]
"w:val", WD_CELL_VERTICAL_ALIGNMENT
)
class CT_VMerge(BaseOxmlElement):
"""``<w:vMerge>`` element, specifying vertical merging behavior of a cell."""
val: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:val", ST_Merge, default=ST_Merge.CONTINUE
)

View File

@@ -0,0 +1,331 @@
# pyright: reportAssignmentType=false
"""Custom element classes related to run properties (font)."""
from __future__ import annotations
from typing import TYPE_CHECKING, Callable
from docx.enum.dml import MSO_THEME_COLOR
from docx.enum.text import WD_COLOR_INDEX, WD_UNDERLINE
from docx.oxml.ns import nsdecls
from docx.oxml.parser import parse_xml
from docx.oxml.simpletypes import (
ST_HexColor,
ST_HpsMeasure,
ST_String,
ST_VerticalAlignRun,
)
from docx.oxml.xmlchemy import (
BaseOxmlElement,
OptionalAttribute,
RequiredAttribute,
ZeroOrOne,
)
from docx.shared import RGBColor
if TYPE_CHECKING:
from docx.oxml.shared import CT_OnOff, CT_String
from docx.shared import Length
class CT_Color(BaseOxmlElement):
"""`w:color` element, specifying the color of a font and perhaps other objects."""
val: RGBColor | str = RequiredAttribute("w:val", ST_HexColor)
themeColor: MSO_THEME_COLOR | None = OptionalAttribute("w:themeColor", MSO_THEME_COLOR)
class CT_Fonts(BaseOxmlElement):
"""`<w:rFonts>` element.
Specifies typeface name for the various language types.
"""
ascii: str | None = OptionalAttribute("w:ascii", ST_String)
hAnsi: str | None = OptionalAttribute("w:hAnsi", ST_String)
class CT_Highlight(BaseOxmlElement):
"""`w:highlight` element, specifying font highlighting/background color."""
val: WD_COLOR_INDEX = RequiredAttribute("w:val", WD_COLOR_INDEX)
class CT_HpsMeasure(BaseOxmlElement):
"""Used for `<w:sz>` element and others, specifying font size in half-points."""
val: Length = RequiredAttribute("w:val", ST_HpsMeasure)
class CT_RPr(BaseOxmlElement):
"""`<w:rPr>` element, containing the properties for a run."""
get_or_add_color: Callable[[], CT_Color]
get_or_add_highlight: Callable[[], CT_Highlight]
get_or_add_rFonts: Callable[[], CT_Fonts]
get_or_add_sz: Callable[[], CT_HpsMeasure]
get_or_add_vertAlign: Callable[[], CT_VerticalAlignRun]
_add_rStyle: Callable[..., CT_String]
_add_u: Callable[[], CT_Underline]
_remove_color: Callable[[], None]
_remove_highlight: Callable[[], None]
_remove_rFonts: Callable[[], None]
_remove_rStyle: Callable[[], None]
_remove_sz: Callable[[], None]
_remove_u: Callable[[], None]
_remove_vertAlign: Callable[[], None]
_tag_seq = (
"w:rStyle",
"w:rFonts",
"w:b",
"w:bCs",
"w:i",
"w:iCs",
"w:caps",
"w:smallCaps",
"w:strike",
"w:dstrike",
"w:outline",
"w:shadow",
"w:emboss",
"w:imprint",
"w:noProof",
"w:snapToGrid",
"w:vanish",
"w:webHidden",
"w:color",
"w:spacing",
"w:w",
"w:kern",
"w:position",
"w:sz",
"w:szCs",
"w:highlight",
"w:u",
"w:effect",
"w:bdr",
"w:shd",
"w:fitText",
"w:vertAlign",
"w:rtl",
"w:cs",
"w:em",
"w:lang",
"w:eastAsianLayout",
"w:specVanish",
"w:oMath",
)
rStyle: CT_String | None = ZeroOrOne("w:rStyle", successors=_tag_seq[1:])
rFonts: CT_Fonts | None = ZeroOrOne("w:rFonts", successors=_tag_seq[2:])
b: CT_OnOff | None = ZeroOrOne("w:b", successors=_tag_seq[3:])
bCs = ZeroOrOne("w:bCs", successors=_tag_seq[4:])
i = ZeroOrOne("w:i", successors=_tag_seq[5:])
iCs = ZeroOrOne("w:iCs", successors=_tag_seq[6:])
caps = ZeroOrOne("w:caps", successors=_tag_seq[7:])
smallCaps = ZeroOrOne("w:smallCaps", successors=_tag_seq[8:])
strike = ZeroOrOne("w:strike", successors=_tag_seq[9:])
dstrike = ZeroOrOne("w:dstrike", successors=_tag_seq[10:])
outline = ZeroOrOne("w:outline", successors=_tag_seq[11:])
shadow = ZeroOrOne("w:shadow", successors=_tag_seq[12:])
emboss = ZeroOrOne("w:emboss", successors=_tag_seq[13:])
imprint = ZeroOrOne("w:imprint", successors=_tag_seq[14:])
noProof = ZeroOrOne("w:noProof", successors=_tag_seq[15:])
snapToGrid = ZeroOrOne("w:snapToGrid", successors=_tag_seq[16:])
vanish = ZeroOrOne("w:vanish", successors=_tag_seq[17:])
webHidden = ZeroOrOne("w:webHidden", successors=_tag_seq[18:])
color: CT_Color | None = ZeroOrOne("w:color", successors=_tag_seq[19:])
sz: CT_HpsMeasure | None = ZeroOrOne("w:sz", successors=_tag_seq[24:])
highlight: CT_Highlight | None = ZeroOrOne("w:highlight", successors=_tag_seq[26:])
u: CT_Underline | None = ZeroOrOne("w:u", successors=_tag_seq[27:])
vertAlign: CT_VerticalAlignRun | None = ZeroOrOne("w:vertAlign", successors=_tag_seq[32:])
rtl = ZeroOrOne("w:rtl", successors=_tag_seq[33:])
cs = ZeroOrOne("w:cs", successors=_tag_seq[34:])
specVanish = ZeroOrOne("w:specVanish", successors=_tag_seq[38:])
oMath = ZeroOrOne("w:oMath", successors=_tag_seq[39:])
del _tag_seq
def _new_color(self):
"""Override metaclass method to set `w:color/@val` to RGB black on create."""
return parse_xml('<w:color %s w:val="000000"/>' % nsdecls("w"))
@property
def highlight_val(self) -> WD_COLOR_INDEX | None:
"""Value of `./w:highlight/@val`.
Specifies font's highlight color, or `None` if the text is not highlighted.
"""
highlight = self.highlight
if highlight is None:
return None
return highlight.val
@highlight_val.setter
def highlight_val(self, value: WD_COLOR_INDEX | None) -> None:
if value is None:
self._remove_highlight()
return
highlight = self.get_or_add_highlight()
highlight.val = value
@property
def rFonts_ascii(self) -> str | None:
"""The value of `w:rFonts/@w:ascii` or |None| if not present.
Represents the assigned typeface name. The rFonts element also specifies other
special-case typeface names; this method handles the case where just the common
name is required.
"""
rFonts = self.rFonts
if rFonts is None:
return None
return rFonts.ascii
@rFonts_ascii.setter
def rFonts_ascii(self, value: str | None) -> None:
if value is None:
self._remove_rFonts()
return
rFonts = self.get_or_add_rFonts()
rFonts.ascii = value
@property
def rFonts_hAnsi(self) -> str | None:
"""The value of `w:rFonts/@w:hAnsi` or |None| if not present."""
rFonts = self.rFonts
if rFonts is None:
return None
return rFonts.hAnsi
@rFonts_hAnsi.setter
def rFonts_hAnsi(self, value: str | None):
if value is None and self.rFonts is None:
return
rFonts = self.get_or_add_rFonts()
rFonts.hAnsi = value
@property
def style(self) -> str | None:
"""String in `./w:rStyle/@val`, or None if `w:rStyle` is not present."""
rStyle = self.rStyle
if rStyle is None:
return None
return rStyle.val
@style.setter
def style(self, style: str | None) -> None:
"""Set `./w:rStyle/@val` to `style`, adding the `w:rStyle` element if necessary.
If `style` is |None|, remove `w:rStyle` element if present.
"""
if style is None:
self._remove_rStyle()
elif self.rStyle is None:
self._add_rStyle(val=style)
else:
self.rStyle.val = style
@property
def subscript(self) -> bool | None:
"""|True| if `./w:vertAlign/@w:val` is "subscript".
|False| if `w:vertAlign/@w:val` contains any other value. |None| if
`w:vertAlign` is not present.
"""
vertAlign = self.vertAlign
if vertAlign is None:
return None
return vertAlign.val == ST_VerticalAlignRun.SUBSCRIPT
@subscript.setter
def subscript(self, value: bool | None) -> None:
if value is None:
self._remove_vertAlign()
elif bool(value) is True:
self.get_or_add_vertAlign().val = ST_VerticalAlignRun.SUBSCRIPT
# -- assert bool(value) is False --
elif self.vertAlign is not None and self.vertAlign.val == ST_VerticalAlignRun.SUBSCRIPT:
self._remove_vertAlign()
@property
def superscript(self) -> bool | None:
"""|True| if `w:vertAlign/@w:val` is 'superscript'.
|False| if `w:vertAlign/@w:val` contains any other value. |None| if
`w:vertAlign` is not present.
"""
vertAlign = self.vertAlign
if vertAlign is None:
return None
return vertAlign.val == ST_VerticalAlignRun.SUPERSCRIPT
@superscript.setter
def superscript(self, value: bool | None):
if value is None:
self._remove_vertAlign()
elif bool(value) is True:
self.get_or_add_vertAlign().val = ST_VerticalAlignRun.SUPERSCRIPT
# -- assert bool(value) is False --
elif self.vertAlign is not None and self.vertAlign.val == ST_VerticalAlignRun.SUPERSCRIPT:
self._remove_vertAlign()
@property
def sz_val(self) -> Length | None:
"""The value of `w:sz/@w:val` or |None| if not present."""
sz = self.sz
if sz is None:
return None
return sz.val
@sz_val.setter
def sz_val(self, value: Length | None):
if value is None:
self._remove_sz()
return
sz = self.get_or_add_sz()
sz.val = value
@property
def u_val(self) -> WD_UNDERLINE | None:
"""Value of `w:u/@val`, or None if not present.
Values `WD_UNDERLINE.SINGLE` and `WD_UNDERLINE.NONE` are mapped to `True` and
`False` respectively.
"""
u = self.u
if u is None:
return None
return u.val
@u_val.setter
def u_val(self, value: WD_UNDERLINE | None):
self._remove_u()
if value is not None:
self._add_u().val = value
def _get_bool_val(self, name: str) -> bool | None:
"""Value of boolean child with `name`, e.g. "w:b", "w:i", and "w:smallCaps"."""
element = getattr(self, name)
if element is None:
return None
return element.val
def _set_bool_val(self, name: str, value: bool | None):
if value is None:
getattr(self, "_remove_%s" % name)()
return
element = getattr(self, "get_or_add_%s" % name)()
element.val = value
class CT_Underline(BaseOxmlElement):
"""`<w:u>` element, specifying the underlining style for a run."""
val: WD_UNDERLINE | None = OptionalAttribute("w:val", WD_UNDERLINE)
class CT_VerticalAlignRun(BaseOxmlElement):
"""`<w:vertAlign>` element, specifying subscript or superscript."""
val: str = RequiredAttribute("w:val", ST_VerticalAlignRun)

View File

@@ -0,0 +1,45 @@
"""Custom element classes related to hyperlinks (CT_Hyperlink)."""
from __future__ import annotations
from typing import TYPE_CHECKING, List
from docx.oxml.simpletypes import ST_OnOff, ST_String, XsdString
from docx.oxml.text.run import CT_R
from docx.oxml.xmlchemy import (
BaseOxmlElement,
OptionalAttribute,
ZeroOrMore,
)
if TYPE_CHECKING:
from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
class CT_Hyperlink(BaseOxmlElement):
"""`<w:hyperlink>` element, containing the text and address for a hyperlink."""
r_lst: List[CT_R]
rId: str | None = OptionalAttribute("r:id", XsdString) # pyright: ignore[reportAssignmentType]
anchor: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:anchor", ST_String
)
history: bool = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:history", ST_OnOff, default=True
)
r = ZeroOrMore("w:r")
@property
def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]:
"""All `w:lastRenderedPageBreak` descendants of this hyperlink."""
return self.xpath("./w:r/w:lastRenderedPageBreak")
@property
def text(self) -> str: # pyright: ignore[reportIncompatibleMethodOverride]
"""The textual content of this hyperlink.
`CT_Hyperlink` stores the hyperlink-text as one or more `w:r` children.
"""
return "".join(r.text for r in self.xpath("w:r"))

View File

@@ -0,0 +1,278 @@
"""Custom element class for rendered page-break (CT_LastRenderedPageBreak)."""
from __future__ import annotations
import copy
from typing import TYPE_CHECKING
from docx.oxml.xmlchemy import BaseOxmlElement
from docx.shared import lazyproperty
if TYPE_CHECKING:
from docx.oxml.text.hyperlink import CT_Hyperlink
from docx.oxml.text.paragraph import CT_P
class CT_LastRenderedPageBreak(BaseOxmlElement):
"""`<w:lastRenderedPageBreak>` element, indicating page break inserted by renderer.
A rendered page-break is one inserted by the renderer when it runs out of room on a
page. It is an empty element (no attrs or children) and is a child of CT_R, peer to
CT_Text.
NOTE: this complex-type name does not exist in the schema, where
`w:lastRenderedPageBreak` maps to `CT_Empty`. This name was added to give it
distinguished behavior. CT_Empty is used for many elements.
"""
@property
def following_fragment_p(self) -> CT_P:
"""A "loose" `CT_P` containing only the paragraph content before this break.
Raises `ValueError` if this `w:lastRenderedPageBreak` is not the first rendered
page-break in its paragraph.
The returned `CT_P` is a "clone" (deepcopy) of the `w:p` ancestor of this
page-break with this `w:lastRenderedPageBreak` element and all content preceding
it removed.
NOTE: this `w:p` can itself contain one or more `w:renderedPageBreak` elements
(when the paragraph contained more than one). While this is rare, the caller
should treat this paragraph the same as other paragraphs and split it if
necessary in a folloing step or recursion.
"""
if not self == self._first_lrpb_in_p(self._enclosing_p):
raise ValueError("only defined on first rendered page-break in paragraph")
# -- splitting approach is different when break is inside a hyperlink --
return (
self._following_frag_in_hlink if self._is_in_hyperlink else self._following_frag_in_run
)
@property
def follows_all_content(self) -> bool:
"""True when this page-break element is the last "content" in the paragraph.
This is very uncommon case and may only occur in contrived or cases where the
XML is edited by hand, but it is not precluded by the spec.
"""
# -- a page-break inside a hyperlink never meets these criteria (for our
# -- purposes at least) because it is considered "atomic" and always associated
# -- with the page it starts on.
if self._is_in_hyperlink:
return False
return bool(
# -- XPath will match zero-or-one w:lastRenderedPageBreak element --
self._enclosing_p.xpath(
# -- in first run of paragraph --
f"(./w:r)[last()]"
# -- all page-breaks --
f"/w:lastRenderedPageBreak"
# -- that are not preceded by any content-bearing elements --
f"[not(following-sibling::*[{self._run_inner_content_xpath}])]"
)
)
@property
def precedes_all_content(self) -> bool:
"""True when a `w:lastRenderedPageBreak` precedes all paragraph content.
This is a common case; it occurs whenever the page breaks on an even paragraph
boundary.
"""
# -- a page-break inside a hyperlink never meets these criteria because there
# -- is always part of the hyperlink text before the page-break.
if self._is_in_hyperlink:
return False
return bool(
# -- XPath will match zero-or-one w:lastRenderedPageBreak element --
self._enclosing_p.xpath(
# -- in first run of paragraph --
f"./w:r[1]"
# -- all page-breaks --
f"/w:lastRenderedPageBreak"
# -- that are not preceded by any content-bearing elements --
f"[not(preceding-sibling::*[{self._run_inner_content_xpath}])]"
)
)
@property
def preceding_fragment_p(self) -> CT_P:
"""A "loose" `CT_P` containing only the paragraph content before this break.
Raises `ValueError` if this `w:lastRenderedPageBreak` is not the first rendered
paragraph in its paragraph.
The returned `CT_P` is a "clone" (deepcopy) of the `w:p` ancestor of this
page-break with this `w:lastRenderedPageBreak` element and all its following
siblings removed.
"""
if not self == self._first_lrpb_in_p(self._enclosing_p):
raise ValueError("only defined on first rendered page-break in paragraph")
# -- splitting approach is different when break is inside a hyperlink --
return (
self._preceding_frag_in_hlink if self._is_in_hyperlink else self._preceding_frag_in_run
)
def _enclosing_hyperlink(self, lrpb: CT_LastRenderedPageBreak) -> CT_Hyperlink:
"""The `w:hyperlink` grandparent of this `w:lastRenderedPageBreak`.
Raises `IndexError` when this page-break has a `w:p` grandparent, so only call
when `._is_in_hyperlink` is True.
"""
return lrpb.xpath("./parent::w:r/parent::w:hyperlink")[0]
@property
def _enclosing_p(self) -> CT_P:
"""The `w:p` element parent or grandparent of this `w:lastRenderedPageBreak`."""
return self.xpath("./ancestor::w:p[1]")[0]
def _first_lrpb_in_p(self, p: CT_P) -> CT_LastRenderedPageBreak:
"""The first `w:lastRenderedPageBreak` element in `p`.
Raises `ValueError` if there are no rendered page-breaks in `p`.
"""
lrpbs = p.xpath("./w:r/w:lastRenderedPageBreak | ./w:hyperlink/w:r/w:lastRenderedPageBreak")
if not lrpbs:
raise ValueError("no rendered page-breaks in paragraph element")
return lrpbs[0]
@lazyproperty
def _following_frag_in_hlink(self) -> CT_P:
"""Following CT_P fragment when break occurs within a hyperlink.
Note this is a *partial-function* and raises when `lrpb` is not inside a
hyperlink.
"""
if not self._is_in_hyperlink:
raise ValueError("only defined on a rendered page-break in a hyperlink")
# -- work on a clone `w:p` so our mutations don't persist --
p = copy.deepcopy(self._enclosing_p)
# -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
lrpb = self._first_lrpb_in_p(p)
# -- locate `w:hyperlink` in which this `w:lastRenderedPageBreak` is found --
hyperlink = lrpb._enclosing_hyperlink(lrpb)
# -- delete all w:p inner-content preceding the hyperlink --
for e in hyperlink.xpath("./preceding-sibling::*[not(self::w:pPr)]"):
p.remove(e)
# -- remove the whole hyperlink, it belongs to the preceding-fragment-p --
hyperlink.getparent().remove(hyperlink)
# -- that's it, return the remaining fragment of `w:p` clone --
return p
@lazyproperty
def _following_frag_in_run(self) -> CT_P:
"""following CT_P fragment when break does not occur in a hyperlink.
Note this is a *partial-function* and raises when `lrpb` is inside a hyperlink.
"""
if self._is_in_hyperlink:
raise ValueError("only defined on a rendered page-break not in a hyperlink")
# -- work on a clone `w:p` so our mutations don't persist --
p = copy.deepcopy(self._enclosing_p)
# -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
lrpb = self._first_lrpb_in_p(p)
# -- locate `w:r` in which this `w:lastRenderedPageBreak` is found --
enclosing_r = lrpb.xpath("./parent::w:r")[0]
# -- delete all w:p inner-content preceding that run (but not w:pPr) --
for e in enclosing_r.xpath("./preceding-sibling::*[not(self::w:pPr)]"):
p.remove(e)
# -- then remove all run inner-content preceding this lrpb in its run (but not
# -- the `w:rPr`) and also remove the page-break itself
for e in lrpb.xpath("./preceding-sibling::*[not(self::w:rPr)]"):
enclosing_r.remove(e)
enclosing_r.remove(lrpb)
return p
@lazyproperty
def _is_in_hyperlink(self) -> bool:
"""True when this page-break is embedded in a hyperlink run."""
return bool(self.xpath("./parent::w:r/parent::w:hyperlink"))
@lazyproperty
def _preceding_frag_in_hlink(self) -> CT_P:
"""Preceding CT_P fragment when break occurs within a hyperlink.
Note this is a *partial-function* and raises when `lrpb` is not inside a
hyperlink.
"""
if not self._is_in_hyperlink:
raise ValueError("only defined on a rendered page-break in a hyperlink")
# -- work on a clone `w:p` so our mutations don't persist --
p = copy.deepcopy(self._enclosing_p)
# -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
lrpb = self._first_lrpb_in_p(p)
# -- locate `w:hyperlink` in which this `w:lastRenderedPageBreak` is found --
hyperlink = lrpb._enclosing_hyperlink(lrpb)
# -- delete all w:p inner-content following the hyperlink --
for e in hyperlink.xpath("./following-sibling::*"):
p.remove(e)
# -- remove this page-break from inside the hyperlink --
lrpb.getparent().remove(lrpb)
# -- that's it, the entire hyperlink goes into the preceding fragment so
# -- the hyperlink is not "split".
return p
@lazyproperty
def _preceding_frag_in_run(self) -> CT_P:
"""Preceding CT_P fragment when break does not occur in a hyperlink.
Note this is a *partial-function* and raises when `lrpb` is inside a hyperlink.
"""
if self._is_in_hyperlink:
raise ValueError("only defined on a rendered page-break not in a hyperlink")
# -- work on a clone `w:p` so our mutations don't persist --
p = copy.deepcopy(self._enclosing_p)
# -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
lrpb = self._first_lrpb_in_p(p)
# -- locate `w:r` in which this `w:lastRenderedPageBreak` is found --
enclosing_r = lrpb.xpath("./parent::w:r")[0]
# -- delete all `w:p` inner-content following that run --
for e in enclosing_r.xpath("./following-sibling::*"):
p.remove(e)
# -- then delete all `w:r` inner-content following this lrpb in its run and
# -- also remove the page-break itself
for e in lrpb.xpath("./following-sibling::*"):
enclosing_r.remove(e)
enclosing_r.remove(lrpb)
return p
@lazyproperty
def _run_inner_content_xpath(self) -> str:
"""XPath fragment matching any run inner-content elements."""
return (
"self::w:br"
" | self::w:cr"
" | self::w:drawing"
" | self::w:noBreakHyphen"
" | self::w:ptab"
" | self::w:t"
" | self::w:tab"
)

View File

@@ -0,0 +1,106 @@
# pyright: reportPrivateUsage=false
"""Custom element classes related to paragraphs (CT_P)."""
from __future__ import annotations
from typing import TYPE_CHECKING, Callable, List, cast
from docx.oxml.parser import OxmlElement
from docx.oxml.xmlchemy import BaseOxmlElement, ZeroOrMore, ZeroOrOne
if TYPE_CHECKING:
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.oxml.section import CT_SectPr
from docx.oxml.text.hyperlink import CT_Hyperlink
from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
from docx.oxml.text.parfmt import CT_PPr
from docx.oxml.text.run import CT_R
class CT_P(BaseOxmlElement):
"""`<w:p>` element, containing the properties and text for a paragraph."""
add_r: Callable[[], CT_R]
get_or_add_pPr: Callable[[], CT_PPr]
hyperlink_lst: List[CT_Hyperlink]
r_lst: List[CT_R]
pPr: CT_PPr | None = ZeroOrOne("w:pPr") # pyright: ignore[reportAssignmentType]
hyperlink = ZeroOrMore("w:hyperlink")
r = ZeroOrMore("w:r")
def add_p_before(self) -> CT_P:
"""Return a new `<w:p>` element inserted directly prior to this one."""
new_p = cast(CT_P, OxmlElement("w:p"))
self.addprevious(new_p)
return new_p
@property
def alignment(self) -> WD_PARAGRAPH_ALIGNMENT | None:
"""The value of the `<w:jc>` grandchild element or |None| if not present."""
pPr = self.pPr
if pPr is None:
return None
return pPr.jc_val
@alignment.setter
def alignment(self, value: WD_PARAGRAPH_ALIGNMENT):
pPr = self.get_or_add_pPr()
pPr.jc_val = value
def clear_content(self):
"""Remove all child elements, except the `<w:pPr>` element if present."""
for child in self.xpath("./*[not(self::w:pPr)]"):
self.remove(child)
@property
def inner_content_elements(self) -> List[CT_R | CT_Hyperlink]:
"""Run and hyperlink children of the `w:p` element, in document order."""
return self.xpath("./w:r | ./w:hyperlink")
@property
def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]:
"""All `w:lastRenderedPageBreak` descendants of this paragraph.
Rendered page-breaks commonly occur in a run but can also occur in a run inside
a hyperlink. This returns both.
"""
return self.xpath(
"./w:r/w:lastRenderedPageBreak | ./w:hyperlink/w:r/w:lastRenderedPageBreak"
)
def set_sectPr(self, sectPr: CT_SectPr):
"""Unconditionally replace or add `sectPr` as grandchild in correct sequence."""
pPr = self.get_or_add_pPr()
pPr._remove_sectPr()
pPr._insert_sectPr(sectPr)
@property
def style(self) -> str | None:
"""String contained in `w:val` attribute of `./w:pPr/w:pStyle` grandchild.
|None| if not present.
"""
pPr = self.pPr
if pPr is None:
return None
return pPr.style
@style.setter
def style(self, style: str | None):
pPr = self.get_or_add_pPr()
pPr.style = style
@property
def text(self): # pyright: ignore[reportIncompatibleMethodOverride]
"""The textual content of this paragraph.
Inner-content child elements like `w:r` and `w:hyperlink` are translated to
their text equivalent.
"""
return "".join(e.text for e in self.xpath("w:r | w:hyperlink"))
def _insert_pPr(self, pPr: CT_PPr) -> CT_PPr:
self.insert(0, pPr)
return pPr

View File

@@ -0,0 +1,392 @@
"""Custom element classes related to paragraph properties (CT_PPr)."""
from __future__ import annotations
from typing import TYPE_CHECKING, Callable
from docx.enum.text import (
WD_ALIGN_PARAGRAPH,
WD_LINE_SPACING,
WD_TAB_ALIGNMENT,
WD_TAB_LEADER,
)
from docx.oxml.shared import CT_DecimalNumber
from docx.oxml.simpletypes import ST_SignedTwipsMeasure, ST_TwipsMeasure
from docx.oxml.xmlchemy import (
BaseOxmlElement,
OneOrMore,
OptionalAttribute,
RequiredAttribute,
ZeroOrOne,
)
from docx.shared import Length
if TYPE_CHECKING:
from docx.oxml.section import CT_SectPr
from docx.oxml.shared import CT_String
class CT_Ind(BaseOxmlElement):
"""``<w:ind>`` element, specifying paragraph indentation."""
left: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:left", ST_SignedTwipsMeasure
)
right: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:right", ST_SignedTwipsMeasure
)
firstLine: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:firstLine", ST_TwipsMeasure
)
hanging: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:hanging", ST_TwipsMeasure
)
class CT_Jc(BaseOxmlElement):
"""``<w:jc>`` element, specifying paragraph justification."""
val: WD_ALIGN_PARAGRAPH = RequiredAttribute( # pyright: ignore[reportAssignmentType]
"w:val", WD_ALIGN_PARAGRAPH
)
class CT_PPr(BaseOxmlElement):
"""``<w:pPr>`` element, containing the properties for a paragraph."""
get_or_add_ind: Callable[[], CT_Ind]
get_or_add_pStyle: Callable[[], CT_String]
get_or_add_sectPr: Callable[[], CT_SectPr]
_insert_sectPr: Callable[[CT_SectPr], None]
_remove_pStyle: Callable[[], None]
_remove_sectPr: Callable[[], None]
_tag_seq = (
"w:pStyle",
"w:keepNext",
"w:keepLines",
"w:pageBreakBefore",
"w:framePr",
"w:widowControl",
"w:numPr",
"w:suppressLineNumbers",
"w:pBdr",
"w:shd",
"w:tabs",
"w:suppressAutoHyphens",
"w:kinsoku",
"w:wordWrap",
"w:overflowPunct",
"w:topLinePunct",
"w:autoSpaceDE",
"w:autoSpaceDN",
"w:bidi",
"w:adjustRightInd",
"w:snapToGrid",
"w:spacing",
"w:ind",
"w:contextualSpacing",
"w:mirrorIndents",
"w:suppressOverlap",
"w:jc",
"w:textDirection",
"w:textAlignment",
"w:textboxTightWrap",
"w:outlineLvl",
"w:divId",
"w:cnfStyle",
"w:rPr",
"w:sectPr",
"w:pPrChange",
)
pStyle: CT_String | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"w:pStyle", successors=_tag_seq[1:]
)
keepNext = ZeroOrOne("w:keepNext", successors=_tag_seq[2:])
keepLines = ZeroOrOne("w:keepLines", successors=_tag_seq[3:])
pageBreakBefore = ZeroOrOne("w:pageBreakBefore", successors=_tag_seq[4:])
widowControl = ZeroOrOne("w:widowControl", successors=_tag_seq[6:])
numPr = ZeroOrOne("w:numPr", successors=_tag_seq[7:])
tabs = ZeroOrOne("w:tabs", successors=_tag_seq[11:])
spacing = ZeroOrOne("w:spacing", successors=_tag_seq[22:])
ind: CT_Ind | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"w:ind", successors=_tag_seq[23:]
)
jc = ZeroOrOne("w:jc", successors=_tag_seq[27:])
outlineLvl: CT_DecimalNumber = ZeroOrOne( # pyright: ignore[reportAssignmentType]
"w:outlineLvl", successors=_tag_seq[31:]
)
sectPr = ZeroOrOne("w:sectPr", successors=_tag_seq[35:])
del _tag_seq
@property
def first_line_indent(self) -> Length | None:
"""A |Length| value calculated from the values of `w:ind/@w:firstLine` and
`w:ind/@w:hanging`.
Returns |None| if the `w:ind` child is not present.
"""
ind = self.ind
if ind is None:
return None
hanging = ind.hanging
if hanging is not None:
return Length(-hanging)
firstLine = ind.firstLine
if firstLine is None:
return None
return firstLine
@first_line_indent.setter
def first_line_indent(self, value: Length | None):
if self.ind is None and value is None:
return
ind = self.get_or_add_ind()
ind.firstLine = ind.hanging = None
if value is None:
return
elif value < 0:
ind.hanging = -value
else:
ind.firstLine = value
@property
def ind_left(self) -> Length | None:
"""The value of `w:ind/@w:left` or |None| if not present."""
ind = self.ind
if ind is None:
return None
return ind.left
@ind_left.setter
def ind_left(self, value: Length | None):
if value is None and self.ind is None:
return
ind = self.get_or_add_ind()
ind.left = value
@property
def ind_right(self) -> Length | None:
"""The value of `w:ind/@w:right` or |None| if not present."""
ind = self.ind
if ind is None:
return None
return ind.right
@ind_right.setter
def ind_right(self, value: Length | None):
if value is None and self.ind is None:
return
ind = self.get_or_add_ind()
ind.right = value
@property
def jc_val(self) -> WD_ALIGN_PARAGRAPH | None:
"""Value of the `<w:jc>` child element or |None| if not present."""
return self.jc.val if self.jc is not None else None
@jc_val.setter
def jc_val(self, value):
if value is None:
self._remove_jc()
return
self.get_or_add_jc().val = value
@property
def keepLines_val(self):
"""The value of `keepLines/@val` or |None| if not present."""
keepLines = self.keepLines
if keepLines is None:
return None
return keepLines.val
@keepLines_val.setter
def keepLines_val(self, value):
if value is None:
self._remove_keepLines()
else:
self.get_or_add_keepLines().val = value
@property
def keepNext_val(self):
"""The value of `keepNext/@val` or |None| if not present."""
keepNext = self.keepNext
if keepNext is None:
return None
return keepNext.val
@keepNext_val.setter
def keepNext_val(self, value):
if value is None:
self._remove_keepNext()
else:
self.get_or_add_keepNext().val = value
@property
def pageBreakBefore_val(self):
"""The value of `pageBreakBefore/@val` or |None| if not present."""
pageBreakBefore = self.pageBreakBefore
if pageBreakBefore is None:
return None
return pageBreakBefore.val
@pageBreakBefore_val.setter
def pageBreakBefore_val(self, value):
if value is None:
self._remove_pageBreakBefore()
else:
self.get_or_add_pageBreakBefore().val = value
@property
def spacing_after(self):
"""The value of `w:spacing/@w:after` or |None| if not present."""
spacing = self.spacing
if spacing is None:
return None
return spacing.after
@spacing_after.setter
def spacing_after(self, value):
if value is None and self.spacing is None:
return
self.get_or_add_spacing().after = value
@property
def spacing_before(self):
"""The value of `w:spacing/@w:before` or |None| if not present."""
spacing = self.spacing
if spacing is None:
return None
return spacing.before
@spacing_before.setter
def spacing_before(self, value):
if value is None and self.spacing is None:
return
self.get_or_add_spacing().before = value
@property
def spacing_line(self):
"""The value of `w:spacing/@w:line` or |None| if not present."""
spacing = self.spacing
if spacing is None:
return None
return spacing.line
@spacing_line.setter
def spacing_line(self, value):
if value is None and self.spacing is None:
return
self.get_or_add_spacing().line = value
@property
def spacing_lineRule(self):
"""The value of `w:spacing/@w:lineRule` as a member of the :ref:`WdLineSpacing`
enumeration.
Only the `MULTIPLE`, `EXACTLY`, and `AT_LEAST` members are used. It is the
responsibility of the client to calculate the use of `SINGLE`, `DOUBLE`, and
`MULTIPLE` based on the value of `w:spacing/@w:line` if that behavior is
desired.
"""
spacing = self.spacing
if spacing is None:
return None
lineRule = spacing.lineRule
if lineRule is None and spacing.line is not None:
return WD_LINE_SPACING.MULTIPLE
return lineRule
@spacing_lineRule.setter
def spacing_lineRule(self, value):
if value is None and self.spacing is None:
return
self.get_or_add_spacing().lineRule = value
@property
def style(self) -> str | None:
"""String contained in `./w:pStyle/@val`, or None if child is not present."""
pStyle = self.pStyle
if pStyle is None:
return None
return pStyle.val
@style.setter
def style(self, style: str | None):
"""Set `./w:pStyle/@val` `style`, adding a new element if necessary.
If `style` is |None|, remove `./w:pStyle` when present.
"""
if style is None:
self._remove_pStyle()
return
pStyle = self.get_or_add_pStyle()
pStyle.val = style
@property
def widowControl_val(self):
"""The value of `widowControl/@val` or |None| if not present."""
widowControl = self.widowControl
if widowControl is None:
return None
return widowControl.val
@widowControl_val.setter
def widowControl_val(self, value):
if value is None:
self._remove_widowControl()
else:
self.get_or_add_widowControl().val = value
class CT_Spacing(BaseOxmlElement):
"""``<w:spacing>`` element, specifying paragraph spacing attributes such as space
before and line spacing."""
after = OptionalAttribute("w:after", ST_TwipsMeasure)
before = OptionalAttribute("w:before", ST_TwipsMeasure)
line = OptionalAttribute("w:line", ST_SignedTwipsMeasure)
lineRule = OptionalAttribute("w:lineRule", WD_LINE_SPACING)
class CT_TabStop(BaseOxmlElement):
"""`<w:tab>` element, representing an individual tab stop.
Overloaded to use for a tab-character in a run, which also uses the w:tab tag but
only needs a __str__ method.
"""
val: WD_TAB_ALIGNMENT = RequiredAttribute( # pyright: ignore[reportAssignmentType]
"w:val", WD_TAB_ALIGNMENT
)
leader: WD_TAB_LEADER | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:leader", WD_TAB_LEADER, default=WD_TAB_LEADER.SPACES
)
pos: Length = RequiredAttribute( # pyright: ignore[reportAssignmentType]
"w:pos", ST_SignedTwipsMeasure
)
def __str__(self) -> str:
"""Text equivalent of a `w:tab` element appearing in a run.
Allows text of run inner-content to be accessed consistently across all text
inner-content.
"""
return "\t"
class CT_TabStops(BaseOxmlElement):
"""``<w:tabs>`` element, container for a sorted sequence of tab stops."""
tab = OneOrMore("w:tab", successors=())
def insert_tab_in_order(self, pos, align, leader):
"""Insert a newly created `w:tab` child element in `pos` order."""
new_tab = self._new_tab()
new_tab.pos, new_tab.val, new_tab.leader = pos, align, leader
for tab in self.tab_lst:
if new_tab.pos < tab.pos:
tab.addprevious(new_tab)
return new_tab
self.append(new_tab)
return new_tab

View File

@@ -0,0 +1,307 @@
"""Custom element classes related to text runs (CT_R)."""
from __future__ import annotations
from typing import TYPE_CHECKING, Callable, Iterator, List, cast
from docx.oxml.drawing import CT_Drawing
from docx.oxml.ns import qn
from docx.oxml.parser import OxmlElement
from docx.oxml.simpletypes import ST_BrClear, ST_BrType
from docx.oxml.text.font import CT_RPr
from docx.oxml.xmlchemy import BaseOxmlElement, OptionalAttribute, ZeroOrMore, ZeroOrOne
from docx.shared import TextAccumulator
if TYPE_CHECKING:
from docx.oxml.shape import CT_Anchor, CT_Inline
from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
from docx.oxml.text.parfmt import CT_TabStop
# ------------------------------------------------------------------------------------
# Run-level elements
class CT_R(BaseOxmlElement):
"""`<w:r>` element, containing the properties and text for a run."""
add_br: Callable[[], CT_Br]
add_tab: Callable[[], CT_TabStop]
get_or_add_rPr: Callable[[], CT_RPr]
_add_drawing: Callable[[], CT_Drawing]
_add_t: Callable[..., CT_Text]
rPr: CT_RPr | None = ZeroOrOne("w:rPr") # pyright: ignore[reportAssignmentType]
br = ZeroOrMore("w:br")
cr = ZeroOrMore("w:cr")
drawing = ZeroOrMore("w:drawing")
t = ZeroOrMore("w:t")
tab = ZeroOrMore("w:tab")
def add_t(self, text: str) -> CT_Text:
"""Return a newly added `<w:t>` element containing `text`."""
t = self._add_t(text=text)
if len(text.strip()) < len(text):
t.set(qn("xml:space"), "preserve")
return t
def add_drawing(self, inline_or_anchor: CT_Inline | CT_Anchor) -> CT_Drawing:
"""Return newly appended `CT_Drawing` (`w:drawing`) child element.
The `w:drawing` element has `inline_or_anchor` as its child.
"""
drawing = self._add_drawing()
drawing.append(inline_or_anchor)
return drawing
def clear_content(self) -> None:
"""Remove all child elements except a `w:rPr` element if present."""
# -- remove all run inner-content except a `w:rPr` when present. --
for e in self.xpath("./*[not(self::w:rPr)]"):
self.remove(e)
@property
def inner_content_items(self) -> List[str | CT_Drawing | CT_LastRenderedPageBreak]:
"""Text of run, possibly punctuated by `w:lastRenderedPageBreak` elements."""
from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
accum = TextAccumulator()
def iter_items() -> Iterator[str | CT_Drawing | CT_LastRenderedPageBreak]:
for e in self.xpath(
"w:br"
" | w:cr"
" | w:drawing"
" | w:lastRenderedPageBreak"
" | w:noBreakHyphen"
" | w:ptab"
" | w:t"
" | w:tab"
):
if isinstance(e, (CT_Drawing, CT_LastRenderedPageBreak)):
yield from accum.pop()
yield e
else:
accum.push(str(e))
# -- don't forget the "tail" string --
yield from accum.pop()
return list(iter_items())
def insert_comment_range_end_and_reference_below(self, comment_id: int) -> None:
"""Insert a `w:commentRangeEnd` and `w:commentReference` element after this run.
The `w:commentRangeEnd` element is the immediate sibling of this `w:r` and is followed by
a `w:r` containing the `w:commentReference` element.
"""
self.addnext(self._new_comment_reference_run(comment_id))
self.addnext(OxmlElement("w:commentRangeEnd", attrs={qn("w:id"): str(comment_id)}))
def insert_comment_range_start_above(self, comment_id: int) -> None:
"""Insert a `w:commentRangeStart` element with `comment_id` before this run."""
self.addprevious(OxmlElement("w:commentRangeStart", attrs={qn("w:id"): str(comment_id)}))
@property
def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]:
"""All `w:lastRenderedPageBreaks` descendants of this run."""
return self.xpath("./w:lastRenderedPageBreak")
@property
def style(self) -> str | None:
"""String contained in `w:val` attribute of `w:rStyle` grandchild.
|None| if that element is not present.
"""
rPr = self.rPr
if rPr is None:
return None
return rPr.style
@style.setter
def style(self, style: str | None):
"""Set character style of this `w:r` element to `style`.
If `style` is None, remove the style element.
"""
rPr = self.get_or_add_rPr()
rPr.style = style
@property
def text(self) -> str:
"""The textual content of this run.
Inner-content child elements like `w:tab` are translated to their text
equivalent.
"""
return "".join(
str(e) for e in self.xpath("w:br | w:cr | w:noBreakHyphen | w:ptab | w:t | w:tab")
)
@text.setter
def text(self, text: str): # pyright: ignore[reportIncompatibleMethodOverride]
self.clear_content()
_RunContentAppender.append_to_run_from_text(self, text)
def _insert_rPr(self, rPr: CT_RPr) -> CT_RPr:
self.insert(0, rPr)
return rPr
def _new_comment_reference_run(self, comment_id: int) -> CT_R:
"""Return a new `w:r` element with `w:commentReference` referencing `comment_id`.
Should look like this:
<w:r>
<w:rPr><w:rStyle w:val="CommentReference"/></w:rPr>
<w:commentReference w:id="0"/>
</w:r>
"""
r = cast(CT_R, OxmlElement("w:r"))
rPr = r.get_or_add_rPr()
rPr.style = "CommentReference"
r.append(OxmlElement("w:commentReference", attrs={qn("w:id"): str(comment_id)}))
return r
# ------------------------------------------------------------------------------------
# Run inner-content elements
class CT_Br(BaseOxmlElement):
"""`<w:br>` element, indicating a line, page, or column break in a run."""
type: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
"w:type", ST_BrType, default="textWrapping"
)
clear: str | None = OptionalAttribute("w:clear", ST_BrClear) # pyright: ignore
def __str__(self) -> str:
"""Text equivalent of this element. Actual value depends on break type.
A line break is translated as "\n". Column and page breaks produce the empty
string ("").
This allows the text of run inner-content to be accessed in a consistent way
for all run inner-context text elements.
"""
return "\n" if self.type == "textWrapping" else ""
class CT_Cr(BaseOxmlElement):
"""`<w:cr>` element, representing a carriage-return (0x0D) character within a run.
In Word, this represents a "soft carriage-return" in the sense that it does not end
the paragraph the way pressing Enter (aka. Return) on the keyboard does. Here the
text equivalent is considered to be newline ("\n") since in plain-text that's the
closest Python equivalent.
NOTE: this complex-type name does not exist in the schema, where `w:tab` maps to
`CT_Empty`. This name was added to give it distinguished behavior. CT_Empty is used
for many elements.
"""
def __str__(self) -> str:
"""Text equivalent of this element, a single newline ("\n")."""
return "\n"
class CT_NoBreakHyphen(BaseOxmlElement):
"""`<w:noBreakHyphen>` element, a hyphen ineligible for a line-wrap position.
This maps to a plain-text dash ("-").
NOTE: this complex-type name does not exist in the schema, where `w:noBreakHyphen`
maps to `CT_Empty`. This name was added to give it behavior distinguished from the
many other elements represented in the schema by CT_Empty.
"""
def __str__(self) -> str:
"""Text equivalent of this element, a single dash character ("-")."""
return "-"
class CT_PTab(BaseOxmlElement):
"""`<w:ptab>` element, representing an absolute-position tab character within a run.
This character advances the rendering position to the specified position regardless
of any tab-stops, perhaps for layout of a table-of-contents (TOC) or similar.
"""
def __str__(self) -> str:
"""Text equivalent of this element, a single tab ("\t") character.
This allows the text of run inner-content to be accessed in a consistent way
for all run inner-context text elements.
"""
return "\t"
# -- CT_Tab functionality is provided by CT_TabStop which also uses `w:tab` tag. That
# -- element class provides the __str__() method for this empty element, unconditionally
# -- returning "\t".
class CT_Text(BaseOxmlElement):
"""`<w:t>` element, containing a sequence of characters within a run."""
def __str__(self) -> str:
"""Text contained in this element, the empty string if it has no content.
This property allows this run inner-content element to be queried for its text
the same way as other run-content elements are. In particular, this never
returns None, as etree._Element does when there is no content.
"""
return self.text or ""
# ------------------------------------------------------------------------------------
# Utility
class _RunContentAppender:
"""Translates a Python string into run content elements appended in a `w:r` element.
Contiguous sequences of regular characters are appended in a single `<w:t>` element.
Each tab character ('\t') causes a `<w:tab/>` element to be appended. Likewise a
newline or carriage return character ('\n', '\r') causes a `<w:cr>` element to be
appended.
"""
def __init__(self, r: CT_R):
self._r = r
self._bfr: List[str] = []
@classmethod
def append_to_run_from_text(cls, r: CT_R, text: str):
"""Append inner-content elements for `text` to `r` element."""
appender = cls(r)
appender.add_text(text)
def add_text(self, text: str):
"""Append inner-content elements for `text` to the `w:r` element."""
for char in text:
self.add_char(char)
self.flush()
def add_char(self, char: str):
"""Process next character of input through finite state maching (FSM).
There are two possible states, buffer pending and not pending, but those are
hidden behind the `.flush()` method which must be called at the end of text to
ensure any pending `<w:t>` element is written.
"""
if char == "\t":
self.flush()
self._r.add_tab()
elif char in "\r\n":
self.flush()
self._r.add_br()
else:
self._bfr.append(char)
def flush(self):
text = "".join(self._bfr)
if text:
self._r.add_t(text)
self._bfr.clear()

View File

@@ -0,0 +1,696 @@
# pyright: reportImportCycles=false
"""Enabling declarative definition of lxml custom element classes."""
from __future__ import annotations
import re
from typing import TYPE_CHECKING, Any, Callable, Sequence, Type, TypeVar
from lxml import etree
from lxml.etree import ElementBase, _Element # pyright: ignore[reportPrivateUsage]
from docx.oxml.exceptions import InvalidXmlError
from docx.oxml.ns import NamespacePrefixedTag, nsmap, qn
from docx.shared import lazyproperty
if TYPE_CHECKING:
from docx.enum.base import BaseXmlEnum
from docx.oxml.simpletypes import BaseSimpleType
def serialize_for_reading(element: ElementBase):
"""Serialize `element` to human-readable XML suitable for tests.
No XML declaration.
"""
xml = etree.tostring(element, encoding="unicode", pretty_print=True)
return XmlString(xml)
class XmlString(str):
"""Provides string comparison override suitable for serialized XML that is useful
for tests."""
# ' <w:xyz xmlns:a="http://ns/decl/a" attr_name="val">text</w:xyz>'
# | | || |
# +----------+------------------------------------------++-----------+
# front attrs | text
# close
_xml_elm_line_patt = re.compile(r"( *</?[\w:]+)(.*?)(/?>)([^<]*</[\w:]+>)?$")
def __eq__(self, other: object) -> bool:
if not isinstance(other, str):
return False
lines = self.splitlines()
lines_other = other.splitlines()
if len(lines) != len(lines_other):
return False
for line, line_other in zip(lines, lines_other):
if not self._eq_elm_strs(line, line_other):
return False
return True
def __ne__(self, other: object) -> bool:
return not self.__eq__(other)
def _attr_seq(self, attrs: str) -> list[str]:
"""Return a sequence of attribute strings parsed from `attrs`.
Each attribute string is stripped of whitespace on both ends.
"""
attrs = attrs.strip()
attr_lst = attrs.split()
return sorted(attr_lst)
def _eq_elm_strs(self, line: str, line_2: str):
"""Return True if the element in `line_2` is XML equivalent to the element in
`line`."""
front, attrs, close, text = self._parse_line(line)
front_2, attrs_2, close_2, text_2 = self._parse_line(line_2)
if front != front_2:
return False
if self._attr_seq(attrs) != self._attr_seq(attrs_2):
return False
if close != close_2:
return False
return text == text_2
@classmethod
def _parse_line(cls, line: str) -> tuple[str, str, str, str]:
"""(front, attrs, close, text) 4-tuple result of parsing XML element `line`."""
match = cls._xml_elm_line_patt.match(line)
if match is None:
return "", "", "", ""
front, attrs, close, text = [match.group(n) for n in range(1, 5)]
return front, attrs, close, text
_T = TypeVar("_T")
class MetaOxmlElement(type):
"""Metaclass for BaseOxmlElement."""
def __init__(cls, clsname: str, bases: tuple[type, ...], namespace: dict[str, Any]):
dispatchable = (
OneAndOnlyOne,
OneOrMore,
OptionalAttribute,
RequiredAttribute,
ZeroOrMore,
ZeroOrOne,
ZeroOrOneChoice,
)
for key, value in namespace.items():
if isinstance(value, dispatchable):
value.populate_class_members(cls, key)
class BaseAttribute:
"""Base class for OptionalAttribute and RequiredAttribute.
Provides common methods.
"""
def __init__(self, attr_name: str, simple_type: Type[BaseXmlEnum] | Type[BaseSimpleType]):
super(BaseAttribute, self).__init__()
self._attr_name = attr_name
self._simple_type = simple_type
def populate_class_members(self, element_cls: MetaOxmlElement, prop_name: str) -> None:
"""Add the appropriate methods to `element_cls`."""
self._element_cls = element_cls
self._prop_name = prop_name
self._add_attr_property()
def _add_attr_property(self):
"""Add a read/write `.{prop_name}` property to the element class.
The property returns the interpreted value of this attribute on access and
changes the attribute value to its ST_* counterpart on assignment.
"""
property_ = property(self._getter, self._setter, None)
# -- assign unconditionally to overwrite element name definition --
setattr(self._element_cls, self._prop_name, property_)
@property
def _clark_name(self):
if ":" in self._attr_name:
return qn(self._attr_name)
return self._attr_name
@property
def _getter(self) -> Callable[[BaseOxmlElement], Any | None]: ...
@property
def _setter(
self,
) -> Callable[[BaseOxmlElement, Any | None], None]: ...
class OptionalAttribute(BaseAttribute):
"""Defines an optional attribute on a custom element class.
An optional attribute returns a default value when not present for reading. When
assigned |None|, the attribute is removed, but still returns the default value when
one is specified.
"""
def __init__(
self,
attr_name: str,
simple_type: Type[BaseXmlEnum] | Type[BaseSimpleType],
default: BaseXmlEnum | BaseSimpleType | str | bool | None = None,
):
super(OptionalAttribute, self).__init__(attr_name, simple_type)
self._default = default
@property
def _docstring(self):
"""String to use as `__doc__` attribute of attribute property."""
return (
f"{self._simple_type.__name__} type-converted value of"
f" ``{self._attr_name}`` attribute, or |None| (or specified default"
f" value) if not present. Assigning the default value causes the"
f" attribute to be removed from the element."
)
@property
def _getter(
self,
) -> Callable[[BaseOxmlElement], Any | None]:
"""Function suitable for `__get__()` method on attribute property descriptor."""
def get_attr_value(
obj: BaseOxmlElement,
) -> Any | None:
attr_str_value = obj.get(self._clark_name)
if attr_str_value is None:
return self._default
return self._simple_type.from_xml(attr_str_value)
get_attr_value.__doc__ = self._docstring
return get_attr_value
@property
def _setter(self) -> Callable[[BaseOxmlElement, Any], None]:
"""Function suitable for `__set__()` method on attribute property descriptor."""
def set_attr_value(obj: BaseOxmlElement, value: Any | None):
if value is None or value == self._default:
if self._clark_name in obj.attrib:
del obj.attrib[self._clark_name]
return
str_value = self._simple_type.to_xml(value)
if str_value is None:
if self._clark_name in obj.attrib:
del obj.attrib[self._clark_name]
return
obj.set(self._clark_name, str_value)
return set_attr_value
class RequiredAttribute(BaseAttribute):
"""Defines a required attribute on a custom element class.
A required attribute is assumed to be present for reading, so does not have a
default value; its actual value is always used. If missing on read, an
|InvalidXmlError| is raised. It also does not remove the attribute if |None| is
assigned. Assigning |None| raises |TypeError| or |ValueError|, depending on the
simple type of the attribute.
"""
@property
def _docstring(self):
"""Return the string to use as the ``__doc__`` attribute of the property for
this attribute."""
return "%s type-converted value of ``%s`` attribute." % (
self._simple_type.__name__,
self._attr_name,
)
@property
def _getter(self) -> Callable[[BaseOxmlElement], Any]:
"""function object suitable for "get" side of attr property descriptor."""
def get_attr_value(obj: BaseOxmlElement) -> Any | None:
attr_str_value = obj.get(self._clark_name)
if attr_str_value is None:
raise InvalidXmlError(
"required '%s' attribute not present on element %s" % (self._attr_name, obj.tag)
)
return self._simple_type.from_xml(attr_str_value)
get_attr_value.__doc__ = self._docstring
return get_attr_value
@property
def _setter(self) -> Callable[[BaseOxmlElement, Any], None]:
"""function object suitable for "set" side of attribute property descriptor."""
def set_attr_value(obj: BaseOxmlElement, value: Any):
str_value = self._simple_type.to_xml(value)
if str_value is None:
raise ValueError(f"cannot assign {value} to this required attribute")
obj.set(self._clark_name, str_value)
return set_attr_value
class _BaseChildElement:
"""Base class for the child-element classes.
The child-element sub-classes correspond to varying cardinalities, such as ZeroOrOne
and ZeroOrMore.
"""
def __init__(self, nsptagname: str, successors: tuple[str, ...] = ()):
super(_BaseChildElement, self).__init__()
self._nsptagname = nsptagname
self._successors = successors
def populate_class_members(self, element_cls: MetaOxmlElement, prop_name: str) -> None:
"""Baseline behavior for adding the appropriate methods to `element_cls`."""
self._element_cls = element_cls
self._prop_name = prop_name
def _add_adder(self):
"""Add an ``_add_x()`` method to the element class for this child element."""
def _add_child(obj: BaseOxmlElement, **attrs: Any):
new_method = getattr(obj, self._new_method_name)
child = new_method()
for key, value in attrs.items():
setattr(child, key, value)
insert_method = getattr(obj, self._insert_method_name)
insert_method(child)
return child
_add_child.__doc__ = (
"Add a new ``<%s>`` child element unconditionally, inserted in t"
"he correct sequence." % self._nsptagname
)
self._add_to_class(self._add_method_name, _add_child)
def _add_creator(self):
"""Add a ``_new_{prop_name}()`` method to the element class that creates a new,
empty element of the correct type, having no attributes."""
creator = self._creator
creator.__doc__ = (
'Return a "loose", newly created ``<%s>`` element having no attri'
"butes, text, or children." % self._nsptagname
)
self._add_to_class(self._new_method_name, creator)
def _add_getter(self):
"""Add a read-only ``{prop_name}`` property to the element class for this child
element."""
property_ = property(self._getter, None, None)
# -- assign unconditionally to overwrite element name definition --
setattr(self._element_cls, self._prop_name, property_)
def _add_inserter(self):
"""Add an ``_insert_x()`` method to the element class for this child element."""
def _insert_child(obj: BaseOxmlElement, child: BaseOxmlElement):
obj.insert_element_before(child, *self._successors)
return child
_insert_child.__doc__ = (
"Return the passed ``<%s>`` element after inserting it as a chil"
"d in the correct sequence." % self._nsptagname
)
self._add_to_class(self._insert_method_name, _insert_child)
def _add_list_getter(self):
"""Add a read-only ``{prop_name}_lst`` property to the element class to retrieve
a list of child elements matching this type."""
prop_name = "%s_lst" % self._prop_name
property_ = property(self._list_getter, None, None)
setattr(self._element_cls, prop_name, property_)
@lazyproperty
def _add_method_name(self):
return "_add_%s" % self._prop_name
def _add_public_adder(self):
"""Add a public ``add_x()`` method to the parent element class."""
def add_child(obj: BaseOxmlElement):
private_add_method = getattr(obj, self._add_method_name)
child = private_add_method()
return child
add_child.__doc__ = (
"Add a new ``<%s>`` child element unconditionally, inserted in t"
"he correct sequence." % self._nsptagname
)
self._add_to_class(self._public_add_method_name, add_child)
def _add_to_class(self, name: str, method: Callable[..., Any]):
"""Add `method` to the target class as `name`, unless `name` is already defined
on the class."""
if hasattr(self._element_cls, name):
return
setattr(self._element_cls, name, method)
@property
def _creator(self) -> Callable[[BaseOxmlElement], BaseOxmlElement]:
"""Callable that creates an empty element of the right type, with no attrs."""
from docx.oxml.parser import OxmlElement
def new_child_element(obj: BaseOxmlElement):
return OxmlElement(self._nsptagname)
return new_child_element
@property
def _getter(self):
"""Return a function object suitable for the "get" side of the property
descriptor.
This default getter returns the child element with matching tag name or |None|
if not present.
"""
def get_child_element(obj: BaseOxmlElement):
return obj.find(qn(self._nsptagname))
get_child_element.__doc__ = (
"``<%s>`` child element or |None| if not present." % self._nsptagname
)
return get_child_element
@lazyproperty
def _insert_method_name(self):
return "_insert_%s" % self._prop_name
@property
def _list_getter(self):
"""Return a function object suitable for the "get" side of a list property
descriptor."""
def get_child_element_list(obj: BaseOxmlElement):
return obj.findall(qn(self._nsptagname))
get_child_element_list.__doc__ = (
"A list containing each of the ``<%s>`` child elements, in the o"
"rder they appear." % self._nsptagname
)
return get_child_element_list
@lazyproperty
def _public_add_method_name(self):
"""add_childElement() is public API for a repeating element, allowing new
elements to be added to the sequence.
May be overridden to provide a friendlier API to clients having domain
appropriate parameter names for required attributes.
"""
return "add_%s" % self._prop_name
@lazyproperty
def _remove_method_name(self):
return "_remove_%s" % self._prop_name
@lazyproperty
def _new_method_name(self):
return "_new_%s" % self._prop_name
class Choice(_BaseChildElement):
"""Defines a child element belonging to a group, only one of which may appear as a child."""
@property
def nsptagname(self):
return self._nsptagname
def populate_class_members( # pyright: ignore[reportIncompatibleMethodOverride]
self,
element_cls: MetaOxmlElement,
group_prop_name: str,
successors: tuple[str, ...],
) -> None:
"""Add the appropriate methods to `element_cls`."""
self._element_cls = element_cls
self._group_prop_name = group_prop_name
self._successors = successors
self._add_getter()
self._add_creator()
self._add_inserter()
self._add_adder()
self._add_get_or_change_to_method()
def _add_get_or_change_to_method(self):
"""Add a ``get_or_change_to_x()`` method to the element class for this child
element."""
def get_or_change_to_child(obj: BaseOxmlElement):
child = getattr(obj, self._prop_name)
if child is not None:
return child
remove_group_method = getattr(obj, self._remove_group_method_name)
remove_group_method()
add_method = getattr(obj, self._add_method_name)
child = add_method()
return child
get_or_change_to_child.__doc__ = (
"Return the ``<%s>`` child, replacing any other group element if found."
) % self._nsptagname
self._add_to_class(self._get_or_change_to_method_name, get_or_change_to_child)
@property
def _prop_name(self):
"""Property name computed from tag name, e.g. a:schemeClr -> schemeClr."""
start = self._nsptagname.index(":") + 1 if ":" in self._nsptagname else 0
return self._nsptagname[start:]
@lazyproperty
def _get_or_change_to_method_name(self):
return "get_or_change_to_%s" % self._prop_name
@lazyproperty
def _remove_group_method_name(self):
return "_remove_%s" % self._group_prop_name
class OneAndOnlyOne(_BaseChildElement):
"""Defines a required child element for MetaOxmlElement."""
def __init__(self, nsptagname: str):
super(OneAndOnlyOne, self).__init__(nsptagname, ())
def populate_class_members(self, element_cls: MetaOxmlElement, prop_name: str) -> None:
"""Add the appropriate methods to `element_cls`."""
super(OneAndOnlyOne, self).populate_class_members(element_cls, prop_name)
self._add_getter()
@property
def _getter(self):
"""Return a function object suitable for the "get" side of the property
descriptor."""
def get_child_element(obj: BaseOxmlElement):
child = obj.find(qn(self._nsptagname))
if child is None:
raise InvalidXmlError(
"required ``<%s>`` child element not present" % self._nsptagname
)
return child
get_child_element.__doc__ = "Required ``<%s>`` child element." % self._nsptagname
return get_child_element
class OneOrMore(_BaseChildElement):
"""Defines a repeating child element for MetaOxmlElement that must appear at least
once."""
def populate_class_members(self, element_cls: MetaOxmlElement, prop_name: str) -> None:
"""Add the appropriate methods to `element_cls`."""
super(OneOrMore, self).populate_class_members(element_cls, prop_name)
self._add_list_getter()
self._add_creator()
self._add_inserter()
self._add_adder()
self._add_public_adder()
delattr(element_cls, prop_name)
class ZeroOrMore(_BaseChildElement):
"""Defines an optional repeating child element for MetaOxmlElement."""
def populate_class_members(self, element_cls: MetaOxmlElement, prop_name: str) -> None:
"""Add the appropriate methods to `element_cls`."""
super(ZeroOrMore, self).populate_class_members(element_cls, prop_name)
self._add_list_getter()
self._add_creator()
self._add_inserter()
self._add_adder()
self._add_public_adder()
delattr(element_cls, prop_name)
class ZeroOrOne(_BaseChildElement):
"""Defines an optional child element for MetaOxmlElement."""
def populate_class_members(self, element_cls: MetaOxmlElement, prop_name: str) -> None:
"""Add the appropriate methods to `element_cls`."""
super(ZeroOrOne, self).populate_class_members(element_cls, prop_name)
self._add_getter()
self._add_creator()
self._add_inserter()
self._add_adder()
self._add_get_or_adder()
self._add_remover()
def _add_get_or_adder(self):
"""Add a ``get_or_add_x()`` method to the element class for this child
element."""
def get_or_add_child(obj: BaseOxmlElement):
child = getattr(obj, self._prop_name)
if child is None:
add_method = getattr(obj, self._add_method_name)
child = add_method()
return child
get_or_add_child.__doc__ = (
"Return the ``<%s>`` child element, newly added if not present."
) % self._nsptagname
self._add_to_class(self._get_or_add_method_name, get_or_add_child)
def _add_remover(self):
"""Add a ``_remove_x()`` method to the element class for this child element."""
def _remove_child(obj: BaseOxmlElement):
obj.remove_all(self._nsptagname)
_remove_child.__doc__ = ("Remove all ``<%s>`` child elements.") % self._nsptagname
self._add_to_class(self._remove_method_name, _remove_child)
@lazyproperty
def _get_or_add_method_name(self):
return "get_or_add_%s" % self._prop_name
class ZeroOrOneChoice(_BaseChildElement):
"""Correspondes to an ``EG_*`` element group where at most one of its members may
appear as a child."""
def __init__(self, choices: Sequence[Choice], successors: tuple[str, ...] = ()):
self._choices = choices
self._successors = successors
def populate_class_members(self, element_cls: MetaOxmlElement, prop_name: str) -> None:
"""Add the appropriate methods to `element_cls`."""
super(ZeroOrOneChoice, self).populate_class_members(element_cls, prop_name)
self._add_choice_getter()
for choice in self._choices:
choice.populate_class_members(element_cls, self._prop_name, self._successors)
self._add_group_remover()
def _add_choice_getter(self):
"""Add a read-only ``{prop_name}`` property to the element class that returns
the present member of this group, or |None| if none are present."""
property_ = property(self._choice_getter, None, None)
# assign unconditionally to overwrite element name definition
setattr(self._element_cls, self._prop_name, property_)
def _add_group_remover(self):
"""Add a ``_remove_eg_x()`` method to the element class for this choice
group."""
def _remove_choice_group(obj: BaseOxmlElement):
for tagname in self._member_nsptagnames:
obj.remove_all(tagname)
_remove_choice_group.__doc__ = "Remove the current choice group child element if present."
self._add_to_class(self._remove_choice_group_method_name, _remove_choice_group)
@property
def _choice_getter(self):
"""Return a function object suitable for the "get" side of the property
descriptor."""
def get_group_member_element(obj: BaseOxmlElement):
return obj.first_child_found_in(*self._member_nsptagnames)
get_group_member_element.__doc__ = (
"Return the child element belonging to this element group, or "
"|None| if no member child is present."
)
return get_group_member_element
@lazyproperty
def _member_nsptagnames(self):
"""Sequence of namespace-prefixed tagnames, one for each of the member elements
of this choice group."""
return [choice.nsptagname for choice in self._choices]
@lazyproperty
def _remove_choice_group_method_name(self):
return "_remove_%s" % self._prop_name
# -- lxml typing isn't quite right here, just ignore this error on _Element --
class BaseOxmlElement(etree.ElementBase, metaclass=MetaOxmlElement):
"""Effective base class for all custom element classes.
Adds standardized behavior to all classes in one place.
"""
def __repr__(self):
return "<%s '<%s>' at 0x%0x>" % (
self.__class__.__name__,
self._nsptag,
id(self),
)
def first_child_found_in(self, *tagnames: str) -> _Element | None:
"""First child with tag in `tagnames`, or None if not found."""
for tagname in tagnames:
child = self.find(qn(tagname))
if child is not None:
return child
return None
def insert_element_before(self, elm: ElementBase, *tagnames: str):
successor = self.first_child_found_in(*tagnames)
if successor is not None:
successor.addprevious(elm)
else:
self.append(elm)
return elm
def remove_all(self, *tagnames: str) -> None:
"""Remove child elements with tagname (e.g. "a:p") in `tagnames`."""
for tagname in tagnames:
matching = self.findall(qn(tagname))
for child in matching:
self.remove(child)
@property
def xml(self) -> str:
"""XML string for this element, suitable for testing purposes.
Pretty printed for readability and without an XML declaration at the top.
"""
return serialize_for_reading(self)
def xpath(self, xpath_str: str) -> Any: # pyright: ignore[reportIncompatibleMethodOverride]
"""Override of `lxml` _Element.xpath() method.
Provides standard Open XML namespace mapping (`nsmap`) in centralized location.
"""
return super().xpath(xpath_str, namespaces=nsmap)
@property
def _nsptag(self) -> str:
return NamespacePrefixedTag.from_clark_name(self.tag)