Initial commit (Clean history)
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
331
path/to/venv/lib/python3.12/site-packages/docx/oxml/text/font.py
Normal file
331
path/to/venv/lib/python3.12/site-packages/docx/oxml/text/font.py
Normal file
@@ -0,0 +1,331 @@
|
||||
# pyright: reportAssignmentType=false
|
||||
|
||||
"""Custom element classes related to run properties (font)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Callable
|
||||
|
||||
from docx.enum.dml import MSO_THEME_COLOR
|
||||
from docx.enum.text import WD_COLOR_INDEX, WD_UNDERLINE
|
||||
from docx.oxml.ns import nsdecls
|
||||
from docx.oxml.parser import parse_xml
|
||||
from docx.oxml.simpletypes import (
|
||||
ST_HexColor,
|
||||
ST_HpsMeasure,
|
||||
ST_String,
|
||||
ST_VerticalAlignRun,
|
||||
)
|
||||
from docx.oxml.xmlchemy import (
|
||||
BaseOxmlElement,
|
||||
OptionalAttribute,
|
||||
RequiredAttribute,
|
||||
ZeroOrOne,
|
||||
)
|
||||
from docx.shared import RGBColor
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.oxml.shared import CT_OnOff, CT_String
|
||||
from docx.shared import Length
|
||||
|
||||
|
||||
class CT_Color(BaseOxmlElement):
|
||||
"""`w:color` element, specifying the color of a font and perhaps other objects."""
|
||||
|
||||
val: RGBColor | str = RequiredAttribute("w:val", ST_HexColor)
|
||||
themeColor: MSO_THEME_COLOR | None = OptionalAttribute("w:themeColor", MSO_THEME_COLOR)
|
||||
|
||||
|
||||
class CT_Fonts(BaseOxmlElement):
|
||||
"""`<w:rFonts>` element.
|
||||
|
||||
Specifies typeface name for the various language types.
|
||||
"""
|
||||
|
||||
ascii: str | None = OptionalAttribute("w:ascii", ST_String)
|
||||
hAnsi: str | None = OptionalAttribute("w:hAnsi", ST_String)
|
||||
|
||||
|
||||
class CT_Highlight(BaseOxmlElement):
|
||||
"""`w:highlight` element, specifying font highlighting/background color."""
|
||||
|
||||
val: WD_COLOR_INDEX = RequiredAttribute("w:val", WD_COLOR_INDEX)
|
||||
|
||||
|
||||
class CT_HpsMeasure(BaseOxmlElement):
|
||||
"""Used for `<w:sz>` element and others, specifying font size in half-points."""
|
||||
|
||||
val: Length = RequiredAttribute("w:val", ST_HpsMeasure)
|
||||
|
||||
|
||||
class CT_RPr(BaseOxmlElement):
|
||||
"""`<w:rPr>` element, containing the properties for a run."""
|
||||
|
||||
get_or_add_color: Callable[[], CT_Color]
|
||||
get_or_add_highlight: Callable[[], CT_Highlight]
|
||||
get_or_add_rFonts: Callable[[], CT_Fonts]
|
||||
get_or_add_sz: Callable[[], CT_HpsMeasure]
|
||||
get_or_add_vertAlign: Callable[[], CT_VerticalAlignRun]
|
||||
_add_rStyle: Callable[..., CT_String]
|
||||
_add_u: Callable[[], CT_Underline]
|
||||
_remove_color: Callable[[], None]
|
||||
_remove_highlight: Callable[[], None]
|
||||
_remove_rFonts: Callable[[], None]
|
||||
_remove_rStyle: Callable[[], None]
|
||||
_remove_sz: Callable[[], None]
|
||||
_remove_u: Callable[[], None]
|
||||
_remove_vertAlign: Callable[[], None]
|
||||
|
||||
_tag_seq = (
|
||||
"w:rStyle",
|
||||
"w:rFonts",
|
||||
"w:b",
|
||||
"w:bCs",
|
||||
"w:i",
|
||||
"w:iCs",
|
||||
"w:caps",
|
||||
"w:smallCaps",
|
||||
"w:strike",
|
||||
"w:dstrike",
|
||||
"w:outline",
|
||||
"w:shadow",
|
||||
"w:emboss",
|
||||
"w:imprint",
|
||||
"w:noProof",
|
||||
"w:snapToGrid",
|
||||
"w:vanish",
|
||||
"w:webHidden",
|
||||
"w:color",
|
||||
"w:spacing",
|
||||
"w:w",
|
||||
"w:kern",
|
||||
"w:position",
|
||||
"w:sz",
|
||||
"w:szCs",
|
||||
"w:highlight",
|
||||
"w:u",
|
||||
"w:effect",
|
||||
"w:bdr",
|
||||
"w:shd",
|
||||
"w:fitText",
|
||||
"w:vertAlign",
|
||||
"w:rtl",
|
||||
"w:cs",
|
||||
"w:em",
|
||||
"w:lang",
|
||||
"w:eastAsianLayout",
|
||||
"w:specVanish",
|
||||
"w:oMath",
|
||||
)
|
||||
rStyle: CT_String | None = ZeroOrOne("w:rStyle", successors=_tag_seq[1:])
|
||||
rFonts: CT_Fonts | None = ZeroOrOne("w:rFonts", successors=_tag_seq[2:])
|
||||
b: CT_OnOff | None = ZeroOrOne("w:b", successors=_tag_seq[3:])
|
||||
bCs = ZeroOrOne("w:bCs", successors=_tag_seq[4:])
|
||||
i = ZeroOrOne("w:i", successors=_tag_seq[5:])
|
||||
iCs = ZeroOrOne("w:iCs", successors=_tag_seq[6:])
|
||||
caps = ZeroOrOne("w:caps", successors=_tag_seq[7:])
|
||||
smallCaps = ZeroOrOne("w:smallCaps", successors=_tag_seq[8:])
|
||||
strike = ZeroOrOne("w:strike", successors=_tag_seq[9:])
|
||||
dstrike = ZeroOrOne("w:dstrike", successors=_tag_seq[10:])
|
||||
outline = ZeroOrOne("w:outline", successors=_tag_seq[11:])
|
||||
shadow = ZeroOrOne("w:shadow", successors=_tag_seq[12:])
|
||||
emboss = ZeroOrOne("w:emboss", successors=_tag_seq[13:])
|
||||
imprint = ZeroOrOne("w:imprint", successors=_tag_seq[14:])
|
||||
noProof = ZeroOrOne("w:noProof", successors=_tag_seq[15:])
|
||||
snapToGrid = ZeroOrOne("w:snapToGrid", successors=_tag_seq[16:])
|
||||
vanish = ZeroOrOne("w:vanish", successors=_tag_seq[17:])
|
||||
webHidden = ZeroOrOne("w:webHidden", successors=_tag_seq[18:])
|
||||
color: CT_Color | None = ZeroOrOne("w:color", successors=_tag_seq[19:])
|
||||
sz: CT_HpsMeasure | None = ZeroOrOne("w:sz", successors=_tag_seq[24:])
|
||||
highlight: CT_Highlight | None = ZeroOrOne("w:highlight", successors=_tag_seq[26:])
|
||||
u: CT_Underline | None = ZeroOrOne("w:u", successors=_tag_seq[27:])
|
||||
vertAlign: CT_VerticalAlignRun | None = ZeroOrOne("w:vertAlign", successors=_tag_seq[32:])
|
||||
rtl = ZeroOrOne("w:rtl", successors=_tag_seq[33:])
|
||||
cs = ZeroOrOne("w:cs", successors=_tag_seq[34:])
|
||||
specVanish = ZeroOrOne("w:specVanish", successors=_tag_seq[38:])
|
||||
oMath = ZeroOrOne("w:oMath", successors=_tag_seq[39:])
|
||||
del _tag_seq
|
||||
|
||||
def _new_color(self):
|
||||
"""Override metaclass method to set `w:color/@val` to RGB black on create."""
|
||||
return parse_xml('<w:color %s w:val="000000"/>' % nsdecls("w"))
|
||||
|
||||
@property
|
||||
def highlight_val(self) -> WD_COLOR_INDEX | None:
|
||||
"""Value of `./w:highlight/@val`.
|
||||
|
||||
Specifies font's highlight color, or `None` if the text is not highlighted.
|
||||
"""
|
||||
highlight = self.highlight
|
||||
if highlight is None:
|
||||
return None
|
||||
return highlight.val
|
||||
|
||||
@highlight_val.setter
|
||||
def highlight_val(self, value: WD_COLOR_INDEX | None) -> None:
|
||||
if value is None:
|
||||
self._remove_highlight()
|
||||
return
|
||||
highlight = self.get_or_add_highlight()
|
||||
highlight.val = value
|
||||
|
||||
@property
|
||||
def rFonts_ascii(self) -> str | None:
|
||||
"""The value of `w:rFonts/@w:ascii` or |None| if not present.
|
||||
|
||||
Represents the assigned typeface name. The rFonts element also specifies other
|
||||
special-case typeface names; this method handles the case where just the common
|
||||
name is required.
|
||||
"""
|
||||
rFonts = self.rFonts
|
||||
if rFonts is None:
|
||||
return None
|
||||
return rFonts.ascii
|
||||
|
||||
@rFonts_ascii.setter
|
||||
def rFonts_ascii(self, value: str | None) -> None:
|
||||
if value is None:
|
||||
self._remove_rFonts()
|
||||
return
|
||||
rFonts = self.get_or_add_rFonts()
|
||||
rFonts.ascii = value
|
||||
|
||||
@property
|
||||
def rFonts_hAnsi(self) -> str | None:
|
||||
"""The value of `w:rFonts/@w:hAnsi` or |None| if not present."""
|
||||
rFonts = self.rFonts
|
||||
if rFonts is None:
|
||||
return None
|
||||
return rFonts.hAnsi
|
||||
|
||||
@rFonts_hAnsi.setter
|
||||
def rFonts_hAnsi(self, value: str | None):
|
||||
if value is None and self.rFonts is None:
|
||||
return
|
||||
rFonts = self.get_or_add_rFonts()
|
||||
rFonts.hAnsi = value
|
||||
|
||||
@property
|
||||
def style(self) -> str | None:
|
||||
"""String in `./w:rStyle/@val`, or None if `w:rStyle` is not present."""
|
||||
rStyle = self.rStyle
|
||||
if rStyle is None:
|
||||
return None
|
||||
return rStyle.val
|
||||
|
||||
@style.setter
|
||||
def style(self, style: str | None) -> None:
|
||||
"""Set `./w:rStyle/@val` to `style`, adding the `w:rStyle` element if necessary.
|
||||
|
||||
If `style` is |None|, remove `w:rStyle` element if present.
|
||||
"""
|
||||
if style is None:
|
||||
self._remove_rStyle()
|
||||
elif self.rStyle is None:
|
||||
self._add_rStyle(val=style)
|
||||
else:
|
||||
self.rStyle.val = style
|
||||
|
||||
@property
|
||||
def subscript(self) -> bool | None:
|
||||
"""|True| if `./w:vertAlign/@w:val` is "subscript".
|
||||
|
||||
|False| if `w:vertAlign/@w:val` contains any other value. |None| if
|
||||
`w:vertAlign` is not present.
|
||||
"""
|
||||
vertAlign = self.vertAlign
|
||||
if vertAlign is None:
|
||||
return None
|
||||
return vertAlign.val == ST_VerticalAlignRun.SUBSCRIPT
|
||||
|
||||
@subscript.setter
|
||||
def subscript(self, value: bool | None) -> None:
|
||||
if value is None:
|
||||
self._remove_vertAlign()
|
||||
elif bool(value) is True:
|
||||
self.get_or_add_vertAlign().val = ST_VerticalAlignRun.SUBSCRIPT
|
||||
# -- assert bool(value) is False --
|
||||
elif self.vertAlign is not None and self.vertAlign.val == ST_VerticalAlignRun.SUBSCRIPT:
|
||||
self._remove_vertAlign()
|
||||
|
||||
@property
|
||||
def superscript(self) -> bool | None:
|
||||
"""|True| if `w:vertAlign/@w:val` is 'superscript'.
|
||||
|
||||
|False| if `w:vertAlign/@w:val` contains any other value. |None| if
|
||||
`w:vertAlign` is not present.
|
||||
"""
|
||||
vertAlign = self.vertAlign
|
||||
if vertAlign is None:
|
||||
return None
|
||||
return vertAlign.val == ST_VerticalAlignRun.SUPERSCRIPT
|
||||
|
||||
@superscript.setter
|
||||
def superscript(self, value: bool | None):
|
||||
if value is None:
|
||||
self._remove_vertAlign()
|
||||
elif bool(value) is True:
|
||||
self.get_or_add_vertAlign().val = ST_VerticalAlignRun.SUPERSCRIPT
|
||||
# -- assert bool(value) is False --
|
||||
elif self.vertAlign is not None and self.vertAlign.val == ST_VerticalAlignRun.SUPERSCRIPT:
|
||||
self._remove_vertAlign()
|
||||
|
||||
@property
|
||||
def sz_val(self) -> Length | None:
|
||||
"""The value of `w:sz/@w:val` or |None| if not present."""
|
||||
sz = self.sz
|
||||
if sz is None:
|
||||
return None
|
||||
return sz.val
|
||||
|
||||
@sz_val.setter
|
||||
def sz_val(self, value: Length | None):
|
||||
if value is None:
|
||||
self._remove_sz()
|
||||
return
|
||||
sz = self.get_or_add_sz()
|
||||
sz.val = value
|
||||
|
||||
@property
|
||||
def u_val(self) -> WD_UNDERLINE | None:
|
||||
"""Value of `w:u/@val`, or None if not present.
|
||||
|
||||
Values `WD_UNDERLINE.SINGLE` and `WD_UNDERLINE.NONE` are mapped to `True` and
|
||||
`False` respectively.
|
||||
"""
|
||||
u = self.u
|
||||
if u is None:
|
||||
return None
|
||||
return u.val
|
||||
|
||||
@u_val.setter
|
||||
def u_val(self, value: WD_UNDERLINE | None):
|
||||
self._remove_u()
|
||||
if value is not None:
|
||||
self._add_u().val = value
|
||||
|
||||
def _get_bool_val(self, name: str) -> bool | None:
|
||||
"""Value of boolean child with `name`, e.g. "w:b", "w:i", and "w:smallCaps"."""
|
||||
element = getattr(self, name)
|
||||
if element is None:
|
||||
return None
|
||||
return element.val
|
||||
|
||||
def _set_bool_val(self, name: str, value: bool | None):
|
||||
if value is None:
|
||||
getattr(self, "_remove_%s" % name)()
|
||||
return
|
||||
element = getattr(self, "get_or_add_%s" % name)()
|
||||
element.val = value
|
||||
|
||||
|
||||
class CT_Underline(BaseOxmlElement):
|
||||
"""`<w:u>` element, specifying the underlining style for a run."""
|
||||
|
||||
val: WD_UNDERLINE | None = OptionalAttribute("w:val", WD_UNDERLINE)
|
||||
|
||||
|
||||
class CT_VerticalAlignRun(BaseOxmlElement):
|
||||
"""`<w:vertAlign>` element, specifying subscript or superscript."""
|
||||
|
||||
val: str = RequiredAttribute("w:val", ST_VerticalAlignRun)
|
||||
@@ -0,0 +1,45 @@
|
||||
"""Custom element classes related to hyperlinks (CT_Hyperlink)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, List
|
||||
|
||||
from docx.oxml.simpletypes import ST_OnOff, ST_String, XsdString
|
||||
from docx.oxml.text.run import CT_R
|
||||
from docx.oxml.xmlchemy import (
|
||||
BaseOxmlElement,
|
||||
OptionalAttribute,
|
||||
ZeroOrMore,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
|
||||
|
||||
|
||||
class CT_Hyperlink(BaseOxmlElement):
|
||||
"""`<w:hyperlink>` element, containing the text and address for a hyperlink."""
|
||||
|
||||
r_lst: List[CT_R]
|
||||
|
||||
rId: str | None = OptionalAttribute("r:id", XsdString) # pyright: ignore[reportAssignmentType]
|
||||
anchor: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:anchor", ST_String
|
||||
)
|
||||
history: bool = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:history", ST_OnOff, default=True
|
||||
)
|
||||
|
||||
r = ZeroOrMore("w:r")
|
||||
|
||||
@property
|
||||
def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]:
|
||||
"""All `w:lastRenderedPageBreak` descendants of this hyperlink."""
|
||||
return self.xpath("./w:r/w:lastRenderedPageBreak")
|
||||
|
||||
@property
|
||||
def text(self) -> str: # pyright: ignore[reportIncompatibleMethodOverride]
|
||||
"""The textual content of this hyperlink.
|
||||
|
||||
`CT_Hyperlink` stores the hyperlink-text as one or more `w:r` children.
|
||||
"""
|
||||
return "".join(r.text for r in self.xpath("w:r"))
|
||||
@@ -0,0 +1,278 @@
|
||||
"""Custom element class for rendered page-break (CT_LastRenderedPageBreak)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from docx.oxml.xmlchemy import BaseOxmlElement
|
||||
from docx.shared import lazyproperty
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.oxml.text.hyperlink import CT_Hyperlink
|
||||
from docx.oxml.text.paragraph import CT_P
|
||||
|
||||
|
||||
class CT_LastRenderedPageBreak(BaseOxmlElement):
|
||||
"""`<w:lastRenderedPageBreak>` element, indicating page break inserted by renderer.
|
||||
|
||||
A rendered page-break is one inserted by the renderer when it runs out of room on a
|
||||
page. It is an empty element (no attrs or children) and is a child of CT_R, peer to
|
||||
CT_Text.
|
||||
|
||||
NOTE: this complex-type name does not exist in the schema, where
|
||||
`w:lastRenderedPageBreak` maps to `CT_Empty`. This name was added to give it
|
||||
distinguished behavior. CT_Empty is used for many elements.
|
||||
"""
|
||||
|
||||
@property
|
||||
def following_fragment_p(self) -> CT_P:
|
||||
"""A "loose" `CT_P` containing only the paragraph content before this break.
|
||||
|
||||
Raises `ValueError` if this `w:lastRenderedPageBreak` is not the first rendered
|
||||
page-break in its paragraph.
|
||||
|
||||
The returned `CT_P` is a "clone" (deepcopy) of the `w:p` ancestor of this
|
||||
page-break with this `w:lastRenderedPageBreak` element and all content preceding
|
||||
it removed.
|
||||
|
||||
NOTE: this `w:p` can itself contain one or more `w:renderedPageBreak` elements
|
||||
(when the paragraph contained more than one). While this is rare, the caller
|
||||
should treat this paragraph the same as other paragraphs and split it if
|
||||
necessary in a folloing step or recursion.
|
||||
"""
|
||||
if not self == self._first_lrpb_in_p(self._enclosing_p):
|
||||
raise ValueError("only defined on first rendered page-break in paragraph")
|
||||
|
||||
# -- splitting approach is different when break is inside a hyperlink --
|
||||
return (
|
||||
self._following_frag_in_hlink if self._is_in_hyperlink else self._following_frag_in_run
|
||||
)
|
||||
|
||||
@property
|
||||
def follows_all_content(self) -> bool:
|
||||
"""True when this page-break element is the last "content" in the paragraph.
|
||||
|
||||
This is very uncommon case and may only occur in contrived or cases where the
|
||||
XML is edited by hand, but it is not precluded by the spec.
|
||||
"""
|
||||
# -- a page-break inside a hyperlink never meets these criteria (for our
|
||||
# -- purposes at least) because it is considered "atomic" and always associated
|
||||
# -- with the page it starts on.
|
||||
if self._is_in_hyperlink:
|
||||
return False
|
||||
|
||||
return bool(
|
||||
# -- XPath will match zero-or-one w:lastRenderedPageBreak element --
|
||||
self._enclosing_p.xpath(
|
||||
# -- in first run of paragraph --
|
||||
f"(./w:r)[last()]"
|
||||
# -- all page-breaks --
|
||||
f"/w:lastRenderedPageBreak"
|
||||
# -- that are not preceded by any content-bearing elements --
|
||||
f"[not(following-sibling::*[{self._run_inner_content_xpath}])]"
|
||||
)
|
||||
)
|
||||
|
||||
@property
|
||||
def precedes_all_content(self) -> bool:
|
||||
"""True when a `w:lastRenderedPageBreak` precedes all paragraph content.
|
||||
|
||||
This is a common case; it occurs whenever the page breaks on an even paragraph
|
||||
boundary.
|
||||
"""
|
||||
# -- a page-break inside a hyperlink never meets these criteria because there
|
||||
# -- is always part of the hyperlink text before the page-break.
|
||||
if self._is_in_hyperlink:
|
||||
return False
|
||||
|
||||
return bool(
|
||||
# -- XPath will match zero-or-one w:lastRenderedPageBreak element --
|
||||
self._enclosing_p.xpath(
|
||||
# -- in first run of paragraph --
|
||||
f"./w:r[1]"
|
||||
# -- all page-breaks --
|
||||
f"/w:lastRenderedPageBreak"
|
||||
# -- that are not preceded by any content-bearing elements --
|
||||
f"[not(preceding-sibling::*[{self._run_inner_content_xpath}])]"
|
||||
)
|
||||
)
|
||||
|
||||
@property
|
||||
def preceding_fragment_p(self) -> CT_P:
|
||||
"""A "loose" `CT_P` containing only the paragraph content before this break.
|
||||
|
||||
Raises `ValueError` if this `w:lastRenderedPageBreak` is not the first rendered
|
||||
paragraph in its paragraph.
|
||||
|
||||
The returned `CT_P` is a "clone" (deepcopy) of the `w:p` ancestor of this
|
||||
page-break with this `w:lastRenderedPageBreak` element and all its following
|
||||
siblings removed.
|
||||
"""
|
||||
if not self == self._first_lrpb_in_p(self._enclosing_p):
|
||||
raise ValueError("only defined on first rendered page-break in paragraph")
|
||||
|
||||
# -- splitting approach is different when break is inside a hyperlink --
|
||||
return (
|
||||
self._preceding_frag_in_hlink if self._is_in_hyperlink else self._preceding_frag_in_run
|
||||
)
|
||||
|
||||
def _enclosing_hyperlink(self, lrpb: CT_LastRenderedPageBreak) -> CT_Hyperlink:
|
||||
"""The `w:hyperlink` grandparent of this `w:lastRenderedPageBreak`.
|
||||
|
||||
Raises `IndexError` when this page-break has a `w:p` grandparent, so only call
|
||||
when `._is_in_hyperlink` is True.
|
||||
"""
|
||||
return lrpb.xpath("./parent::w:r/parent::w:hyperlink")[0]
|
||||
|
||||
@property
|
||||
def _enclosing_p(self) -> CT_P:
|
||||
"""The `w:p` element parent or grandparent of this `w:lastRenderedPageBreak`."""
|
||||
return self.xpath("./ancestor::w:p[1]")[0]
|
||||
|
||||
def _first_lrpb_in_p(self, p: CT_P) -> CT_LastRenderedPageBreak:
|
||||
"""The first `w:lastRenderedPageBreak` element in `p`.
|
||||
|
||||
Raises `ValueError` if there are no rendered page-breaks in `p`.
|
||||
"""
|
||||
lrpbs = p.xpath("./w:r/w:lastRenderedPageBreak | ./w:hyperlink/w:r/w:lastRenderedPageBreak")
|
||||
if not lrpbs:
|
||||
raise ValueError("no rendered page-breaks in paragraph element")
|
||||
return lrpbs[0]
|
||||
|
||||
@lazyproperty
|
||||
def _following_frag_in_hlink(self) -> CT_P:
|
||||
"""Following CT_P fragment when break occurs within a hyperlink.
|
||||
|
||||
Note this is a *partial-function* and raises when `lrpb` is not inside a
|
||||
hyperlink.
|
||||
"""
|
||||
if not self._is_in_hyperlink:
|
||||
raise ValueError("only defined on a rendered page-break in a hyperlink")
|
||||
|
||||
# -- work on a clone `w:p` so our mutations don't persist --
|
||||
p = copy.deepcopy(self._enclosing_p)
|
||||
|
||||
# -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
|
||||
lrpb = self._first_lrpb_in_p(p)
|
||||
|
||||
# -- locate `w:hyperlink` in which this `w:lastRenderedPageBreak` is found --
|
||||
hyperlink = lrpb._enclosing_hyperlink(lrpb)
|
||||
|
||||
# -- delete all w:p inner-content preceding the hyperlink --
|
||||
for e in hyperlink.xpath("./preceding-sibling::*[not(self::w:pPr)]"):
|
||||
p.remove(e)
|
||||
|
||||
# -- remove the whole hyperlink, it belongs to the preceding-fragment-p --
|
||||
hyperlink.getparent().remove(hyperlink)
|
||||
|
||||
# -- that's it, return the remaining fragment of `w:p` clone --
|
||||
return p
|
||||
|
||||
@lazyproperty
|
||||
def _following_frag_in_run(self) -> CT_P:
|
||||
"""following CT_P fragment when break does not occur in a hyperlink.
|
||||
|
||||
Note this is a *partial-function* and raises when `lrpb` is inside a hyperlink.
|
||||
"""
|
||||
if self._is_in_hyperlink:
|
||||
raise ValueError("only defined on a rendered page-break not in a hyperlink")
|
||||
|
||||
# -- work on a clone `w:p` so our mutations don't persist --
|
||||
p = copy.deepcopy(self._enclosing_p)
|
||||
|
||||
# -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
|
||||
lrpb = self._first_lrpb_in_p(p)
|
||||
|
||||
# -- locate `w:r` in which this `w:lastRenderedPageBreak` is found --
|
||||
enclosing_r = lrpb.xpath("./parent::w:r")[0]
|
||||
|
||||
# -- delete all w:p inner-content preceding that run (but not w:pPr) --
|
||||
for e in enclosing_r.xpath("./preceding-sibling::*[not(self::w:pPr)]"):
|
||||
p.remove(e)
|
||||
|
||||
# -- then remove all run inner-content preceding this lrpb in its run (but not
|
||||
# -- the `w:rPr`) and also remove the page-break itself
|
||||
for e in lrpb.xpath("./preceding-sibling::*[not(self::w:rPr)]"):
|
||||
enclosing_r.remove(e)
|
||||
enclosing_r.remove(lrpb)
|
||||
|
||||
return p
|
||||
|
||||
@lazyproperty
|
||||
def _is_in_hyperlink(self) -> bool:
|
||||
"""True when this page-break is embedded in a hyperlink run."""
|
||||
return bool(self.xpath("./parent::w:r/parent::w:hyperlink"))
|
||||
|
||||
@lazyproperty
|
||||
def _preceding_frag_in_hlink(self) -> CT_P:
|
||||
"""Preceding CT_P fragment when break occurs within a hyperlink.
|
||||
|
||||
Note this is a *partial-function* and raises when `lrpb` is not inside a
|
||||
hyperlink.
|
||||
"""
|
||||
if not self._is_in_hyperlink:
|
||||
raise ValueError("only defined on a rendered page-break in a hyperlink")
|
||||
|
||||
# -- work on a clone `w:p` so our mutations don't persist --
|
||||
p = copy.deepcopy(self._enclosing_p)
|
||||
|
||||
# -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
|
||||
lrpb = self._first_lrpb_in_p(p)
|
||||
|
||||
# -- locate `w:hyperlink` in which this `w:lastRenderedPageBreak` is found --
|
||||
hyperlink = lrpb._enclosing_hyperlink(lrpb)
|
||||
|
||||
# -- delete all w:p inner-content following the hyperlink --
|
||||
for e in hyperlink.xpath("./following-sibling::*"):
|
||||
p.remove(e)
|
||||
|
||||
# -- remove this page-break from inside the hyperlink --
|
||||
lrpb.getparent().remove(lrpb)
|
||||
|
||||
# -- that's it, the entire hyperlink goes into the preceding fragment so
|
||||
# -- the hyperlink is not "split".
|
||||
return p
|
||||
|
||||
@lazyproperty
|
||||
def _preceding_frag_in_run(self) -> CT_P:
|
||||
"""Preceding CT_P fragment when break does not occur in a hyperlink.
|
||||
|
||||
Note this is a *partial-function* and raises when `lrpb` is inside a hyperlink.
|
||||
"""
|
||||
if self._is_in_hyperlink:
|
||||
raise ValueError("only defined on a rendered page-break not in a hyperlink")
|
||||
|
||||
# -- work on a clone `w:p` so our mutations don't persist --
|
||||
p = copy.deepcopy(self._enclosing_p)
|
||||
|
||||
# -- get this `w:lastRenderedPageBreak` in the cloned `w:p` (not self) --
|
||||
lrpb = self._first_lrpb_in_p(p)
|
||||
|
||||
# -- locate `w:r` in which this `w:lastRenderedPageBreak` is found --
|
||||
enclosing_r = lrpb.xpath("./parent::w:r")[0]
|
||||
|
||||
# -- delete all `w:p` inner-content following that run --
|
||||
for e in enclosing_r.xpath("./following-sibling::*"):
|
||||
p.remove(e)
|
||||
|
||||
# -- then delete all `w:r` inner-content following this lrpb in its run and
|
||||
# -- also remove the page-break itself
|
||||
for e in lrpb.xpath("./following-sibling::*"):
|
||||
enclosing_r.remove(e)
|
||||
enclosing_r.remove(lrpb)
|
||||
|
||||
return p
|
||||
|
||||
@lazyproperty
|
||||
def _run_inner_content_xpath(self) -> str:
|
||||
"""XPath fragment matching any run inner-content elements."""
|
||||
return (
|
||||
"self::w:br"
|
||||
" | self::w:cr"
|
||||
" | self::w:drawing"
|
||||
" | self::w:noBreakHyphen"
|
||||
" | self::w:ptab"
|
||||
" | self::w:t"
|
||||
" | self::w:tab"
|
||||
)
|
||||
@@ -0,0 +1,106 @@
|
||||
# pyright: reportPrivateUsage=false
|
||||
|
||||
"""Custom element classes related to paragraphs (CT_P)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Callable, List, cast
|
||||
|
||||
from docx.oxml.parser import OxmlElement
|
||||
from docx.oxml.xmlchemy import BaseOxmlElement, ZeroOrMore, ZeroOrOne
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
||||
from docx.oxml.section import CT_SectPr
|
||||
from docx.oxml.text.hyperlink import CT_Hyperlink
|
||||
from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
|
||||
from docx.oxml.text.parfmt import CT_PPr
|
||||
from docx.oxml.text.run import CT_R
|
||||
|
||||
|
||||
class CT_P(BaseOxmlElement):
|
||||
"""`<w:p>` element, containing the properties and text for a paragraph."""
|
||||
|
||||
add_r: Callable[[], CT_R]
|
||||
get_or_add_pPr: Callable[[], CT_PPr]
|
||||
hyperlink_lst: List[CT_Hyperlink]
|
||||
r_lst: List[CT_R]
|
||||
|
||||
pPr: CT_PPr | None = ZeroOrOne("w:pPr") # pyright: ignore[reportAssignmentType]
|
||||
hyperlink = ZeroOrMore("w:hyperlink")
|
||||
r = ZeroOrMore("w:r")
|
||||
|
||||
def add_p_before(self) -> CT_P:
|
||||
"""Return a new `<w:p>` element inserted directly prior to this one."""
|
||||
new_p = cast(CT_P, OxmlElement("w:p"))
|
||||
self.addprevious(new_p)
|
||||
return new_p
|
||||
|
||||
@property
|
||||
def alignment(self) -> WD_PARAGRAPH_ALIGNMENT | None:
|
||||
"""The value of the `<w:jc>` grandchild element or |None| if not present."""
|
||||
pPr = self.pPr
|
||||
if pPr is None:
|
||||
return None
|
||||
return pPr.jc_val
|
||||
|
||||
@alignment.setter
|
||||
def alignment(self, value: WD_PARAGRAPH_ALIGNMENT):
|
||||
pPr = self.get_or_add_pPr()
|
||||
pPr.jc_val = value
|
||||
|
||||
def clear_content(self):
|
||||
"""Remove all child elements, except the `<w:pPr>` element if present."""
|
||||
for child in self.xpath("./*[not(self::w:pPr)]"):
|
||||
self.remove(child)
|
||||
|
||||
@property
|
||||
def inner_content_elements(self) -> List[CT_R | CT_Hyperlink]:
|
||||
"""Run and hyperlink children of the `w:p` element, in document order."""
|
||||
return self.xpath("./w:r | ./w:hyperlink")
|
||||
|
||||
@property
|
||||
def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]:
|
||||
"""All `w:lastRenderedPageBreak` descendants of this paragraph.
|
||||
|
||||
Rendered page-breaks commonly occur in a run but can also occur in a run inside
|
||||
a hyperlink. This returns both.
|
||||
"""
|
||||
return self.xpath(
|
||||
"./w:r/w:lastRenderedPageBreak | ./w:hyperlink/w:r/w:lastRenderedPageBreak"
|
||||
)
|
||||
|
||||
def set_sectPr(self, sectPr: CT_SectPr):
|
||||
"""Unconditionally replace or add `sectPr` as grandchild in correct sequence."""
|
||||
pPr = self.get_or_add_pPr()
|
||||
pPr._remove_sectPr()
|
||||
pPr._insert_sectPr(sectPr)
|
||||
|
||||
@property
|
||||
def style(self) -> str | None:
|
||||
"""String contained in `w:val` attribute of `./w:pPr/w:pStyle` grandchild.
|
||||
|
||||
|None| if not present.
|
||||
"""
|
||||
pPr = self.pPr
|
||||
if pPr is None:
|
||||
return None
|
||||
return pPr.style
|
||||
|
||||
@style.setter
|
||||
def style(self, style: str | None):
|
||||
pPr = self.get_or_add_pPr()
|
||||
pPr.style = style
|
||||
|
||||
@property
|
||||
def text(self): # pyright: ignore[reportIncompatibleMethodOverride]
|
||||
"""The textual content of this paragraph.
|
||||
|
||||
Inner-content child elements like `w:r` and `w:hyperlink` are translated to
|
||||
their text equivalent.
|
||||
"""
|
||||
return "".join(e.text for e in self.xpath("w:r | w:hyperlink"))
|
||||
|
||||
def _insert_pPr(self, pPr: CT_PPr) -> CT_PPr:
|
||||
self.insert(0, pPr)
|
||||
return pPr
|
||||
@@ -0,0 +1,392 @@
|
||||
"""Custom element classes related to paragraph properties (CT_PPr)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Callable
|
||||
|
||||
from docx.enum.text import (
|
||||
WD_ALIGN_PARAGRAPH,
|
||||
WD_LINE_SPACING,
|
||||
WD_TAB_ALIGNMENT,
|
||||
WD_TAB_LEADER,
|
||||
)
|
||||
from docx.oxml.shared import CT_DecimalNumber
|
||||
from docx.oxml.simpletypes import ST_SignedTwipsMeasure, ST_TwipsMeasure
|
||||
from docx.oxml.xmlchemy import (
|
||||
BaseOxmlElement,
|
||||
OneOrMore,
|
||||
OptionalAttribute,
|
||||
RequiredAttribute,
|
||||
ZeroOrOne,
|
||||
)
|
||||
from docx.shared import Length
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.oxml.section import CT_SectPr
|
||||
from docx.oxml.shared import CT_String
|
||||
|
||||
|
||||
class CT_Ind(BaseOxmlElement):
|
||||
"""``<w:ind>`` element, specifying paragraph indentation."""
|
||||
|
||||
left: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:left", ST_SignedTwipsMeasure
|
||||
)
|
||||
right: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:right", ST_SignedTwipsMeasure
|
||||
)
|
||||
firstLine: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:firstLine", ST_TwipsMeasure
|
||||
)
|
||||
hanging: Length | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:hanging", ST_TwipsMeasure
|
||||
)
|
||||
|
||||
|
||||
class CT_Jc(BaseOxmlElement):
|
||||
"""``<w:jc>`` element, specifying paragraph justification."""
|
||||
|
||||
val: WD_ALIGN_PARAGRAPH = RequiredAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:val", WD_ALIGN_PARAGRAPH
|
||||
)
|
||||
|
||||
|
||||
class CT_PPr(BaseOxmlElement):
|
||||
"""``<w:pPr>`` element, containing the properties for a paragraph."""
|
||||
|
||||
get_or_add_ind: Callable[[], CT_Ind]
|
||||
get_or_add_pStyle: Callable[[], CT_String]
|
||||
get_or_add_sectPr: Callable[[], CT_SectPr]
|
||||
_insert_sectPr: Callable[[CT_SectPr], None]
|
||||
_remove_pStyle: Callable[[], None]
|
||||
_remove_sectPr: Callable[[], None]
|
||||
|
||||
_tag_seq = (
|
||||
"w:pStyle",
|
||||
"w:keepNext",
|
||||
"w:keepLines",
|
||||
"w:pageBreakBefore",
|
||||
"w:framePr",
|
||||
"w:widowControl",
|
||||
"w:numPr",
|
||||
"w:suppressLineNumbers",
|
||||
"w:pBdr",
|
||||
"w:shd",
|
||||
"w:tabs",
|
||||
"w:suppressAutoHyphens",
|
||||
"w:kinsoku",
|
||||
"w:wordWrap",
|
||||
"w:overflowPunct",
|
||||
"w:topLinePunct",
|
||||
"w:autoSpaceDE",
|
||||
"w:autoSpaceDN",
|
||||
"w:bidi",
|
||||
"w:adjustRightInd",
|
||||
"w:snapToGrid",
|
||||
"w:spacing",
|
||||
"w:ind",
|
||||
"w:contextualSpacing",
|
||||
"w:mirrorIndents",
|
||||
"w:suppressOverlap",
|
||||
"w:jc",
|
||||
"w:textDirection",
|
||||
"w:textAlignment",
|
||||
"w:textboxTightWrap",
|
||||
"w:outlineLvl",
|
||||
"w:divId",
|
||||
"w:cnfStyle",
|
||||
"w:rPr",
|
||||
"w:sectPr",
|
||||
"w:pPrChange",
|
||||
)
|
||||
pStyle: CT_String | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:pStyle", successors=_tag_seq[1:]
|
||||
)
|
||||
keepNext = ZeroOrOne("w:keepNext", successors=_tag_seq[2:])
|
||||
keepLines = ZeroOrOne("w:keepLines", successors=_tag_seq[3:])
|
||||
pageBreakBefore = ZeroOrOne("w:pageBreakBefore", successors=_tag_seq[4:])
|
||||
widowControl = ZeroOrOne("w:widowControl", successors=_tag_seq[6:])
|
||||
numPr = ZeroOrOne("w:numPr", successors=_tag_seq[7:])
|
||||
tabs = ZeroOrOne("w:tabs", successors=_tag_seq[11:])
|
||||
spacing = ZeroOrOne("w:spacing", successors=_tag_seq[22:])
|
||||
ind: CT_Ind | None = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:ind", successors=_tag_seq[23:]
|
||||
)
|
||||
jc = ZeroOrOne("w:jc", successors=_tag_seq[27:])
|
||||
outlineLvl: CT_DecimalNumber = ZeroOrOne( # pyright: ignore[reportAssignmentType]
|
||||
"w:outlineLvl", successors=_tag_seq[31:]
|
||||
)
|
||||
sectPr = ZeroOrOne("w:sectPr", successors=_tag_seq[35:])
|
||||
del _tag_seq
|
||||
|
||||
@property
|
||||
def first_line_indent(self) -> Length | None:
|
||||
"""A |Length| value calculated from the values of `w:ind/@w:firstLine` and
|
||||
`w:ind/@w:hanging`.
|
||||
|
||||
Returns |None| if the `w:ind` child is not present.
|
||||
"""
|
||||
ind = self.ind
|
||||
if ind is None:
|
||||
return None
|
||||
hanging = ind.hanging
|
||||
if hanging is not None:
|
||||
return Length(-hanging)
|
||||
firstLine = ind.firstLine
|
||||
if firstLine is None:
|
||||
return None
|
||||
return firstLine
|
||||
|
||||
@first_line_indent.setter
|
||||
def first_line_indent(self, value: Length | None):
|
||||
if self.ind is None and value is None:
|
||||
return
|
||||
ind = self.get_or_add_ind()
|
||||
ind.firstLine = ind.hanging = None
|
||||
if value is None:
|
||||
return
|
||||
elif value < 0:
|
||||
ind.hanging = -value
|
||||
else:
|
||||
ind.firstLine = value
|
||||
|
||||
@property
|
||||
def ind_left(self) -> Length | None:
|
||||
"""The value of `w:ind/@w:left` or |None| if not present."""
|
||||
ind = self.ind
|
||||
if ind is None:
|
||||
return None
|
||||
return ind.left
|
||||
|
||||
@ind_left.setter
|
||||
def ind_left(self, value: Length | None):
|
||||
if value is None and self.ind is None:
|
||||
return
|
||||
ind = self.get_or_add_ind()
|
||||
ind.left = value
|
||||
|
||||
@property
|
||||
def ind_right(self) -> Length | None:
|
||||
"""The value of `w:ind/@w:right` or |None| if not present."""
|
||||
ind = self.ind
|
||||
if ind is None:
|
||||
return None
|
||||
return ind.right
|
||||
|
||||
@ind_right.setter
|
||||
def ind_right(self, value: Length | None):
|
||||
if value is None and self.ind is None:
|
||||
return
|
||||
ind = self.get_or_add_ind()
|
||||
ind.right = value
|
||||
|
||||
@property
|
||||
def jc_val(self) -> WD_ALIGN_PARAGRAPH | None:
|
||||
"""Value of the `<w:jc>` child element or |None| if not present."""
|
||||
return self.jc.val if self.jc is not None else None
|
||||
|
||||
@jc_val.setter
|
||||
def jc_val(self, value):
|
||||
if value is None:
|
||||
self._remove_jc()
|
||||
return
|
||||
self.get_or_add_jc().val = value
|
||||
|
||||
@property
|
||||
def keepLines_val(self):
|
||||
"""The value of `keepLines/@val` or |None| if not present."""
|
||||
keepLines = self.keepLines
|
||||
if keepLines is None:
|
||||
return None
|
||||
return keepLines.val
|
||||
|
||||
@keepLines_val.setter
|
||||
def keepLines_val(self, value):
|
||||
if value is None:
|
||||
self._remove_keepLines()
|
||||
else:
|
||||
self.get_or_add_keepLines().val = value
|
||||
|
||||
@property
|
||||
def keepNext_val(self):
|
||||
"""The value of `keepNext/@val` or |None| if not present."""
|
||||
keepNext = self.keepNext
|
||||
if keepNext is None:
|
||||
return None
|
||||
return keepNext.val
|
||||
|
||||
@keepNext_val.setter
|
||||
def keepNext_val(self, value):
|
||||
if value is None:
|
||||
self._remove_keepNext()
|
||||
else:
|
||||
self.get_or_add_keepNext().val = value
|
||||
|
||||
@property
|
||||
def pageBreakBefore_val(self):
|
||||
"""The value of `pageBreakBefore/@val` or |None| if not present."""
|
||||
pageBreakBefore = self.pageBreakBefore
|
||||
if pageBreakBefore is None:
|
||||
return None
|
||||
return pageBreakBefore.val
|
||||
|
||||
@pageBreakBefore_val.setter
|
||||
def pageBreakBefore_val(self, value):
|
||||
if value is None:
|
||||
self._remove_pageBreakBefore()
|
||||
else:
|
||||
self.get_or_add_pageBreakBefore().val = value
|
||||
|
||||
@property
|
||||
def spacing_after(self):
|
||||
"""The value of `w:spacing/@w:after` or |None| if not present."""
|
||||
spacing = self.spacing
|
||||
if spacing is None:
|
||||
return None
|
||||
return spacing.after
|
||||
|
||||
@spacing_after.setter
|
||||
def spacing_after(self, value):
|
||||
if value is None and self.spacing is None:
|
||||
return
|
||||
self.get_or_add_spacing().after = value
|
||||
|
||||
@property
|
||||
def spacing_before(self):
|
||||
"""The value of `w:spacing/@w:before` or |None| if not present."""
|
||||
spacing = self.spacing
|
||||
if spacing is None:
|
||||
return None
|
||||
return spacing.before
|
||||
|
||||
@spacing_before.setter
|
||||
def spacing_before(self, value):
|
||||
if value is None and self.spacing is None:
|
||||
return
|
||||
self.get_or_add_spacing().before = value
|
||||
|
||||
@property
|
||||
def spacing_line(self):
|
||||
"""The value of `w:spacing/@w:line` or |None| if not present."""
|
||||
spacing = self.spacing
|
||||
if spacing is None:
|
||||
return None
|
||||
return spacing.line
|
||||
|
||||
@spacing_line.setter
|
||||
def spacing_line(self, value):
|
||||
if value is None and self.spacing is None:
|
||||
return
|
||||
self.get_or_add_spacing().line = value
|
||||
|
||||
@property
|
||||
def spacing_lineRule(self):
|
||||
"""The value of `w:spacing/@w:lineRule` as a member of the :ref:`WdLineSpacing`
|
||||
enumeration.
|
||||
|
||||
Only the `MULTIPLE`, `EXACTLY`, and `AT_LEAST` members are used. It is the
|
||||
responsibility of the client to calculate the use of `SINGLE`, `DOUBLE`, and
|
||||
`MULTIPLE` based on the value of `w:spacing/@w:line` if that behavior is
|
||||
desired.
|
||||
"""
|
||||
spacing = self.spacing
|
||||
if spacing is None:
|
||||
return None
|
||||
lineRule = spacing.lineRule
|
||||
if lineRule is None and spacing.line is not None:
|
||||
return WD_LINE_SPACING.MULTIPLE
|
||||
return lineRule
|
||||
|
||||
@spacing_lineRule.setter
|
||||
def spacing_lineRule(self, value):
|
||||
if value is None and self.spacing is None:
|
||||
return
|
||||
self.get_or_add_spacing().lineRule = value
|
||||
|
||||
@property
|
||||
def style(self) -> str | None:
|
||||
"""String contained in `./w:pStyle/@val`, or None if child is not present."""
|
||||
pStyle = self.pStyle
|
||||
if pStyle is None:
|
||||
return None
|
||||
return pStyle.val
|
||||
|
||||
@style.setter
|
||||
def style(self, style: str | None):
|
||||
"""Set `./w:pStyle/@val` `style`, adding a new element if necessary.
|
||||
|
||||
If `style` is |None|, remove `./w:pStyle` when present.
|
||||
"""
|
||||
if style is None:
|
||||
self._remove_pStyle()
|
||||
return
|
||||
pStyle = self.get_or_add_pStyle()
|
||||
pStyle.val = style
|
||||
|
||||
@property
|
||||
def widowControl_val(self):
|
||||
"""The value of `widowControl/@val` or |None| if not present."""
|
||||
widowControl = self.widowControl
|
||||
if widowControl is None:
|
||||
return None
|
||||
return widowControl.val
|
||||
|
||||
@widowControl_val.setter
|
||||
def widowControl_val(self, value):
|
||||
if value is None:
|
||||
self._remove_widowControl()
|
||||
else:
|
||||
self.get_or_add_widowControl().val = value
|
||||
|
||||
|
||||
class CT_Spacing(BaseOxmlElement):
|
||||
"""``<w:spacing>`` element, specifying paragraph spacing attributes such as space
|
||||
before and line spacing."""
|
||||
|
||||
after = OptionalAttribute("w:after", ST_TwipsMeasure)
|
||||
before = OptionalAttribute("w:before", ST_TwipsMeasure)
|
||||
line = OptionalAttribute("w:line", ST_SignedTwipsMeasure)
|
||||
lineRule = OptionalAttribute("w:lineRule", WD_LINE_SPACING)
|
||||
|
||||
|
||||
class CT_TabStop(BaseOxmlElement):
|
||||
"""`<w:tab>` element, representing an individual tab stop.
|
||||
|
||||
Overloaded to use for a tab-character in a run, which also uses the w:tab tag but
|
||||
only needs a __str__ method.
|
||||
"""
|
||||
|
||||
val: WD_TAB_ALIGNMENT = RequiredAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:val", WD_TAB_ALIGNMENT
|
||||
)
|
||||
leader: WD_TAB_LEADER | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:leader", WD_TAB_LEADER, default=WD_TAB_LEADER.SPACES
|
||||
)
|
||||
pos: Length = RequiredAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:pos", ST_SignedTwipsMeasure
|
||||
)
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Text equivalent of a `w:tab` element appearing in a run.
|
||||
|
||||
Allows text of run inner-content to be accessed consistently across all text
|
||||
inner-content.
|
||||
"""
|
||||
return "\t"
|
||||
|
||||
|
||||
class CT_TabStops(BaseOxmlElement):
|
||||
"""``<w:tabs>`` element, container for a sorted sequence of tab stops."""
|
||||
|
||||
tab = OneOrMore("w:tab", successors=())
|
||||
|
||||
def insert_tab_in_order(self, pos, align, leader):
|
||||
"""Insert a newly created `w:tab` child element in `pos` order."""
|
||||
new_tab = self._new_tab()
|
||||
new_tab.pos, new_tab.val, new_tab.leader = pos, align, leader
|
||||
for tab in self.tab_lst:
|
||||
if new_tab.pos < tab.pos:
|
||||
tab.addprevious(new_tab)
|
||||
return new_tab
|
||||
self.append(new_tab)
|
||||
return new_tab
|
||||
307
path/to/venv/lib/python3.12/site-packages/docx/oxml/text/run.py
Normal file
307
path/to/venv/lib/python3.12/site-packages/docx/oxml/text/run.py
Normal file
@@ -0,0 +1,307 @@
|
||||
"""Custom element classes related to text runs (CT_R)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Callable, Iterator, List, cast
|
||||
|
||||
from docx.oxml.drawing import CT_Drawing
|
||||
from docx.oxml.ns import qn
|
||||
from docx.oxml.parser import OxmlElement
|
||||
from docx.oxml.simpletypes import ST_BrClear, ST_BrType
|
||||
from docx.oxml.text.font import CT_RPr
|
||||
from docx.oxml.xmlchemy import BaseOxmlElement, OptionalAttribute, ZeroOrMore, ZeroOrOne
|
||||
from docx.shared import TextAccumulator
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.oxml.shape import CT_Anchor, CT_Inline
|
||||
from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
|
||||
from docx.oxml.text.parfmt import CT_TabStop
|
||||
|
||||
# ------------------------------------------------------------------------------------
|
||||
# Run-level elements
|
||||
|
||||
|
||||
class CT_R(BaseOxmlElement):
|
||||
"""`<w:r>` element, containing the properties and text for a run."""
|
||||
|
||||
add_br: Callable[[], CT_Br]
|
||||
add_tab: Callable[[], CT_TabStop]
|
||||
get_or_add_rPr: Callable[[], CT_RPr]
|
||||
_add_drawing: Callable[[], CT_Drawing]
|
||||
_add_t: Callable[..., CT_Text]
|
||||
|
||||
rPr: CT_RPr | None = ZeroOrOne("w:rPr") # pyright: ignore[reportAssignmentType]
|
||||
br = ZeroOrMore("w:br")
|
||||
cr = ZeroOrMore("w:cr")
|
||||
drawing = ZeroOrMore("w:drawing")
|
||||
t = ZeroOrMore("w:t")
|
||||
tab = ZeroOrMore("w:tab")
|
||||
|
||||
def add_t(self, text: str) -> CT_Text:
|
||||
"""Return a newly added `<w:t>` element containing `text`."""
|
||||
t = self._add_t(text=text)
|
||||
if len(text.strip()) < len(text):
|
||||
t.set(qn("xml:space"), "preserve")
|
||||
return t
|
||||
|
||||
def add_drawing(self, inline_or_anchor: CT_Inline | CT_Anchor) -> CT_Drawing:
|
||||
"""Return newly appended `CT_Drawing` (`w:drawing`) child element.
|
||||
|
||||
The `w:drawing` element has `inline_or_anchor` as its child.
|
||||
"""
|
||||
drawing = self._add_drawing()
|
||||
drawing.append(inline_or_anchor)
|
||||
return drawing
|
||||
|
||||
def clear_content(self) -> None:
|
||||
"""Remove all child elements except a `w:rPr` element if present."""
|
||||
# -- remove all run inner-content except a `w:rPr` when present. --
|
||||
for e in self.xpath("./*[not(self::w:rPr)]"):
|
||||
self.remove(e)
|
||||
|
||||
@property
|
||||
def inner_content_items(self) -> List[str | CT_Drawing | CT_LastRenderedPageBreak]:
|
||||
"""Text of run, possibly punctuated by `w:lastRenderedPageBreak` elements."""
|
||||
from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
|
||||
|
||||
accum = TextAccumulator()
|
||||
|
||||
def iter_items() -> Iterator[str | CT_Drawing | CT_LastRenderedPageBreak]:
|
||||
for e in self.xpath(
|
||||
"w:br"
|
||||
" | w:cr"
|
||||
" | w:drawing"
|
||||
" | w:lastRenderedPageBreak"
|
||||
" | w:noBreakHyphen"
|
||||
" | w:ptab"
|
||||
" | w:t"
|
||||
" | w:tab"
|
||||
):
|
||||
if isinstance(e, (CT_Drawing, CT_LastRenderedPageBreak)):
|
||||
yield from accum.pop()
|
||||
yield e
|
||||
else:
|
||||
accum.push(str(e))
|
||||
|
||||
# -- don't forget the "tail" string --
|
||||
yield from accum.pop()
|
||||
|
||||
return list(iter_items())
|
||||
|
||||
def insert_comment_range_end_and_reference_below(self, comment_id: int) -> None:
|
||||
"""Insert a `w:commentRangeEnd` and `w:commentReference` element after this run.
|
||||
|
||||
The `w:commentRangeEnd` element is the immediate sibling of this `w:r` and is followed by
|
||||
a `w:r` containing the `w:commentReference` element.
|
||||
"""
|
||||
self.addnext(self._new_comment_reference_run(comment_id))
|
||||
self.addnext(OxmlElement("w:commentRangeEnd", attrs={qn("w:id"): str(comment_id)}))
|
||||
|
||||
def insert_comment_range_start_above(self, comment_id: int) -> None:
|
||||
"""Insert a `w:commentRangeStart` element with `comment_id` before this run."""
|
||||
self.addprevious(OxmlElement("w:commentRangeStart", attrs={qn("w:id"): str(comment_id)}))
|
||||
|
||||
@property
|
||||
def lastRenderedPageBreaks(self) -> List[CT_LastRenderedPageBreak]:
|
||||
"""All `w:lastRenderedPageBreaks` descendants of this run."""
|
||||
return self.xpath("./w:lastRenderedPageBreak")
|
||||
|
||||
@property
|
||||
def style(self) -> str | None:
|
||||
"""String contained in `w:val` attribute of `w:rStyle` grandchild.
|
||||
|
||||
|None| if that element is not present.
|
||||
"""
|
||||
rPr = self.rPr
|
||||
if rPr is None:
|
||||
return None
|
||||
return rPr.style
|
||||
|
||||
@style.setter
|
||||
def style(self, style: str | None):
|
||||
"""Set character style of this `w:r` element to `style`.
|
||||
|
||||
If `style` is None, remove the style element.
|
||||
"""
|
||||
rPr = self.get_or_add_rPr()
|
||||
rPr.style = style
|
||||
|
||||
@property
|
||||
def text(self) -> str:
|
||||
"""The textual content of this run.
|
||||
|
||||
Inner-content child elements like `w:tab` are translated to their text
|
||||
equivalent.
|
||||
"""
|
||||
return "".join(
|
||||
str(e) for e in self.xpath("w:br | w:cr | w:noBreakHyphen | w:ptab | w:t | w:tab")
|
||||
)
|
||||
|
||||
@text.setter
|
||||
def text(self, text: str): # pyright: ignore[reportIncompatibleMethodOverride]
|
||||
self.clear_content()
|
||||
_RunContentAppender.append_to_run_from_text(self, text)
|
||||
|
||||
def _insert_rPr(self, rPr: CT_RPr) -> CT_RPr:
|
||||
self.insert(0, rPr)
|
||||
return rPr
|
||||
|
||||
def _new_comment_reference_run(self, comment_id: int) -> CT_R:
|
||||
"""Return a new `w:r` element with `w:commentReference` referencing `comment_id`.
|
||||
|
||||
Should look like this:
|
||||
|
||||
<w:r>
|
||||
<w:rPr><w:rStyle w:val="CommentReference"/></w:rPr>
|
||||
<w:commentReference w:id="0"/>
|
||||
</w:r>
|
||||
|
||||
"""
|
||||
r = cast(CT_R, OxmlElement("w:r"))
|
||||
rPr = r.get_or_add_rPr()
|
||||
rPr.style = "CommentReference"
|
||||
r.append(OxmlElement("w:commentReference", attrs={qn("w:id"): str(comment_id)}))
|
||||
return r
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------------
|
||||
# Run inner-content elements
|
||||
|
||||
|
||||
class CT_Br(BaseOxmlElement):
|
||||
"""`<w:br>` element, indicating a line, page, or column break in a run."""
|
||||
|
||||
type: str | None = OptionalAttribute( # pyright: ignore[reportAssignmentType]
|
||||
"w:type", ST_BrType, default="textWrapping"
|
||||
)
|
||||
clear: str | None = OptionalAttribute("w:clear", ST_BrClear) # pyright: ignore
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Text equivalent of this element. Actual value depends on break type.
|
||||
|
||||
A line break is translated as "\n". Column and page breaks produce the empty
|
||||
string ("").
|
||||
|
||||
This allows the text of run inner-content to be accessed in a consistent way
|
||||
for all run inner-context text elements.
|
||||
"""
|
||||
return "\n" if self.type == "textWrapping" else ""
|
||||
|
||||
|
||||
class CT_Cr(BaseOxmlElement):
|
||||
"""`<w:cr>` element, representing a carriage-return (0x0D) character within a run.
|
||||
|
||||
In Word, this represents a "soft carriage-return" in the sense that it does not end
|
||||
the paragraph the way pressing Enter (aka. Return) on the keyboard does. Here the
|
||||
text equivalent is considered to be newline ("\n") since in plain-text that's the
|
||||
closest Python equivalent.
|
||||
|
||||
NOTE: this complex-type name does not exist in the schema, where `w:tab` maps to
|
||||
`CT_Empty`. This name was added to give it distinguished behavior. CT_Empty is used
|
||||
for many elements.
|
||||
"""
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Text equivalent of this element, a single newline ("\n")."""
|
||||
return "\n"
|
||||
|
||||
|
||||
class CT_NoBreakHyphen(BaseOxmlElement):
|
||||
"""`<w:noBreakHyphen>` element, a hyphen ineligible for a line-wrap position.
|
||||
|
||||
This maps to a plain-text dash ("-").
|
||||
|
||||
NOTE: this complex-type name does not exist in the schema, where `w:noBreakHyphen`
|
||||
maps to `CT_Empty`. This name was added to give it behavior distinguished from the
|
||||
many other elements represented in the schema by CT_Empty.
|
||||
"""
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Text equivalent of this element, a single dash character ("-")."""
|
||||
return "-"
|
||||
|
||||
|
||||
class CT_PTab(BaseOxmlElement):
|
||||
"""`<w:ptab>` element, representing an absolute-position tab character within a run.
|
||||
|
||||
This character advances the rendering position to the specified position regardless
|
||||
of any tab-stops, perhaps for layout of a table-of-contents (TOC) or similar.
|
||||
"""
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Text equivalent of this element, a single tab ("\t") character.
|
||||
|
||||
This allows the text of run inner-content to be accessed in a consistent way
|
||||
for all run inner-context text elements.
|
||||
"""
|
||||
return "\t"
|
||||
|
||||
|
||||
# -- CT_Tab functionality is provided by CT_TabStop which also uses `w:tab` tag. That
|
||||
# -- element class provides the __str__() method for this empty element, unconditionally
|
||||
# -- returning "\t".
|
||||
|
||||
|
||||
class CT_Text(BaseOxmlElement):
|
||||
"""`<w:t>` element, containing a sequence of characters within a run."""
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Text contained in this element, the empty string if it has no content.
|
||||
|
||||
This property allows this run inner-content element to be queried for its text
|
||||
the same way as other run-content elements are. In particular, this never
|
||||
returns None, as etree._Element does when there is no content.
|
||||
"""
|
||||
return self.text or ""
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------------
|
||||
# Utility
|
||||
|
||||
|
||||
class _RunContentAppender:
|
||||
"""Translates a Python string into run content elements appended in a `w:r` element.
|
||||
|
||||
Contiguous sequences of regular characters are appended in a single `<w:t>` element.
|
||||
Each tab character ('\t') causes a `<w:tab/>` element to be appended. Likewise a
|
||||
newline or carriage return character ('\n', '\r') causes a `<w:cr>` element to be
|
||||
appended.
|
||||
"""
|
||||
|
||||
def __init__(self, r: CT_R):
|
||||
self._r = r
|
||||
self._bfr: List[str] = []
|
||||
|
||||
@classmethod
|
||||
def append_to_run_from_text(cls, r: CT_R, text: str):
|
||||
"""Append inner-content elements for `text` to `r` element."""
|
||||
appender = cls(r)
|
||||
appender.add_text(text)
|
||||
|
||||
def add_text(self, text: str):
|
||||
"""Append inner-content elements for `text` to the `w:r` element."""
|
||||
for char in text:
|
||||
self.add_char(char)
|
||||
self.flush()
|
||||
|
||||
def add_char(self, char: str):
|
||||
"""Process next character of input through finite state maching (FSM).
|
||||
|
||||
There are two possible states, buffer pending and not pending, but those are
|
||||
hidden behind the `.flush()` method which must be called at the end of text to
|
||||
ensure any pending `<w:t>` element is written.
|
||||
"""
|
||||
if char == "\t":
|
||||
self.flush()
|
||||
self._r.add_tab()
|
||||
elif char in "\r\n":
|
||||
self.flush()
|
||||
self._r.add_br()
|
||||
else:
|
||||
self._bfr.append(char)
|
||||
|
||||
def flush(self):
|
||||
text = "".join(self._bfr)
|
||||
if text:
|
||||
self._r.add_t(text)
|
||||
self._bfr.clear()
|
||||
Reference in New Issue
Block a user