Initial commit (Clean history)

This commit is contained in:
anhduy-tech
2025-12-30 11:27:14 +07:00
commit ef48c93de0
19255 changed files with 3248867 additions and 0 deletions

View File

@@ -0,0 +1,428 @@
"""Font-related proxy objects."""
from __future__ import annotations
from typing import TYPE_CHECKING, Any
from docx.dml.color import ColorFormat
from docx.enum.text import WD_UNDERLINE
from docx.shared import ElementProxy, Emu
if TYPE_CHECKING:
from docx.enum.text import WD_COLOR_INDEX
from docx.oxml.text.run import CT_R
from docx.shared import Length
class Font(ElementProxy):
"""Proxy object for parent of a `<w:rPr>` element and providing access to
character properties such as font name, font size, bold, and subscript."""
def __init__(self, r: CT_R, parent: Any | None = None):
super().__init__(r, parent)
self._element = r
self._r = r
@property
def all_caps(self) -> bool | None:
"""Read/write.
Causes text in this font to appear in capital letters.
"""
return self._get_bool_prop("caps")
@all_caps.setter
def all_caps(self, value: bool | None) -> None:
self._set_bool_prop("caps", value)
@property
def bold(self) -> bool | None:
"""Read/write.
Causes text in this font to appear in bold.
"""
return self._get_bool_prop("b")
@bold.setter
def bold(self, value: bool | None) -> None:
self._set_bool_prop("b", value)
@property
def color(self):
"""A |ColorFormat| object providing a way to get and set the text color for this
font."""
return ColorFormat(self._element)
@property
def complex_script(self) -> bool | None:
"""Read/write tri-state value.
When |True|, causes the characters in the run to be treated as complex script
regardless of their Unicode values.
"""
return self._get_bool_prop("cs")
@complex_script.setter
def complex_script(self, value: bool | None) -> None:
self._set_bool_prop("cs", value)
@property
def cs_bold(self) -> bool | None:
"""Read/write tri-state value.
When |True|, causes the complex script characters in the run to be displayed in
bold typeface.
"""
return self._get_bool_prop("bCs")
@cs_bold.setter
def cs_bold(self, value: bool | None) -> None:
self._set_bool_prop("bCs", value)
@property
def cs_italic(self) -> bool | None:
"""Read/write tri-state value.
When |True|, causes the complex script characters in the run to be displayed in
italic typeface.
"""
return self._get_bool_prop("iCs")
@cs_italic.setter
def cs_italic(self, value: bool | None) -> None:
self._set_bool_prop("iCs", value)
@property
def double_strike(self) -> bool | None:
"""Read/write tri-state value.
When |True|, causes the text in the run to appear with double strikethrough.
"""
return self._get_bool_prop("dstrike")
@double_strike.setter
def double_strike(self, value: bool | None) -> None:
self._set_bool_prop("dstrike", value)
@property
def emboss(self) -> bool | None:
"""Read/write tri-state value.
When |True|, causes the text in the run to appear as if raised off the page in
relief.
"""
return self._get_bool_prop("emboss")
@emboss.setter
def emboss(self, value: bool | None) -> None:
self._set_bool_prop("emboss", value)
@property
def hidden(self) -> bool | None:
"""Read/write tri-state value.
When |True|, causes the text in the run to be hidden from display, unless
applications settings force hidden text to be shown.
"""
return self._get_bool_prop("vanish")
@hidden.setter
def hidden(self, value: bool | None) -> None:
self._set_bool_prop("vanish", value)
@property
def highlight_color(self) -> WD_COLOR_INDEX | None:
"""Color of highlighing applied or |None| if not highlighted."""
rPr = self._element.rPr
if rPr is None:
return None
return rPr.highlight_val
@highlight_color.setter
def highlight_color(self, value: WD_COLOR_INDEX | None):
rPr = self._element.get_or_add_rPr()
rPr.highlight_val = value
@property
def italic(self) -> bool | None:
"""Read/write tri-state value.
When |True|, causes the text of the run to appear in italics. |None| indicates
the effective value is inherited from the style hierarchy.
"""
return self._get_bool_prop("i")
@italic.setter
def italic(self, value: bool | None) -> None:
self._set_bool_prop("i", value)
@property
def imprint(self) -> bool | None:
"""Read/write tri-state value.
When |True|, causes the text in the run to appear as if pressed into the page.
"""
return self._get_bool_prop("imprint")
@imprint.setter
def imprint(self, value: bool | None) -> None:
self._set_bool_prop("imprint", value)
@property
def math(self) -> bool | None:
"""Read/write tri-state value.
When |True|, specifies this run contains WML that should be handled as though it
was Office Open XML Math.
"""
return self._get_bool_prop("oMath")
@math.setter
def math(self, value: bool | None) -> None:
self._set_bool_prop("oMath", value)
@property
def name(self) -> str | None:
"""The typeface name for this |Font|.
Causes the text it controls to appear in the named font, if a matching font is
found. |None| indicates the typeface is inherited from the style hierarchy.
"""
rPr = self._element.rPr
if rPr is None:
return None
return rPr.rFonts_ascii
@name.setter
def name(self, value: str | None) -> None:
rPr = self._element.get_or_add_rPr()
rPr.rFonts_ascii = value
rPr.rFonts_hAnsi = value
@property
def no_proof(self) -> bool | None:
"""Read/write tri-state value.
When |True|, specifies that the contents of this run should not report any
errors when the document is scanned for spelling and grammar.
"""
return self._get_bool_prop("noProof")
@no_proof.setter
def no_proof(self, value: bool | None) -> None:
self._set_bool_prop("noProof", value)
@property
def outline(self) -> bool | None:
"""Read/write tri-state value.
When |True| causes the characters in the run to appear as if they have an
outline, by drawing a one pixel wide border around the inside and outside
borders of each character glyph.
"""
return self._get_bool_prop("outline")
@outline.setter
def outline(self, value: bool | None) -> None:
self._set_bool_prop("outline", value)
@property
def rtl(self) -> bool | None:
"""Read/write tri-state value.
When |True| causes the text in the run to have right-to-left characteristics.
"""
return self._get_bool_prop("rtl")
@rtl.setter
def rtl(self, value: bool | None) -> None:
self._set_bool_prop("rtl", value)
@property
def shadow(self) -> bool | None:
"""Read/write tri-state value.
When |True| causes the text in the run to appear as if each character has a
shadow.
"""
return self._get_bool_prop("shadow")
@shadow.setter
def shadow(self, value: bool | None) -> None:
self._set_bool_prop("shadow", value)
@property
def size(self) -> Length | None:
"""Font height in English Metric Units (EMU).
|None| indicates the font size should be inherited from the style hierarchy.
|Length| is a subclass of |int| having properties for convenient conversion into
points or other length units. The :class:`docx.shared.Pt` class allows
convenient specification of point values::
>>> font.size = Pt(24)
>>> font.size
304800
>>> font.size.pt
24.0
"""
rPr = self._element.rPr
if rPr is None:
return None
return rPr.sz_val
@size.setter
def size(self, emu: int | Length | None) -> None:
rPr = self._element.get_or_add_rPr()
rPr.sz_val = None if emu is None else Emu(emu)
@property
def small_caps(self) -> bool | None:
"""Read/write tri-state value.
When |True| causes the lowercase characters in the run to appear as capital
letters two points smaller than the font size specified for the run.
"""
return self._get_bool_prop("smallCaps")
@small_caps.setter
def small_caps(self, value: bool | None) -> None:
self._set_bool_prop("smallCaps", value)
@property
def snap_to_grid(self) -> bool | None:
"""Read/write tri-state value.
When |True| causes the run to use the document grid characters per line settings
defined in the docGrid element when laying out the characters in this run.
"""
return self._get_bool_prop("snapToGrid")
@snap_to_grid.setter
def snap_to_grid(self, value: bool | None) -> None:
self._set_bool_prop("snapToGrid", value)
@property
def spec_vanish(self) -> bool | None:
"""Read/write tri-state value.
When |True|, specifies that the given run shall always behave as if it is
hidden, even when hidden text is being displayed in the current document. The
property has a very narrow, specialized use related to the table of contents.
Consult the spec (§17.3.2.36) for more details.
"""
return self._get_bool_prop("specVanish")
@spec_vanish.setter
def spec_vanish(self, value: bool | None) -> None:
self._set_bool_prop("specVanish", value)
@property
def strike(self) -> bool | None:
"""Read/write tri-state value.
When |True| causes the text in the run to appear with a single horizontal line
through the center of the line.
"""
return self._get_bool_prop("strike")
@strike.setter
def strike(self, value: bool | None) -> None:
self._set_bool_prop("strike", value)
@property
def subscript(self) -> bool | None:
"""Boolean indicating whether the characters in this |Font| appear as subscript.
|None| indicates the subscript/subscript value is inherited from the style
hierarchy.
"""
rPr = self._element.rPr
if rPr is None:
return None
return rPr.subscript
@subscript.setter
def subscript(self, value: bool | None) -> None:
rPr = self._element.get_or_add_rPr()
rPr.subscript = value
@property
def superscript(self) -> bool | None:
"""Boolean indicating whether the characters in this |Font| appear as
superscript.
|None| indicates the subscript/superscript value is inherited from the style
hierarchy.
"""
rPr = self._element.rPr
if rPr is None:
return None
return rPr.superscript
@superscript.setter
def superscript(self, value: bool | None) -> None:
rPr = self._element.get_or_add_rPr()
rPr.superscript = value
@property
def underline(self) -> bool | WD_UNDERLINE | None:
"""The underline style for this |Font|.
The value is one of |None|, |True|, |False|, or a member of :ref:`WdUnderline`.
|None| indicates the font inherits its underline value from the style hierarchy.
|False| indicates no underline. |True| indicates single underline. The values
from :ref:`WdUnderline` are used to specify other outline styles such as double,
wavy, and dotted.
"""
rPr = self._element.rPr
if rPr is None:
return None
val = rPr.u_val
return (
None
if val == WD_UNDERLINE.INHERITED
else True
if val == WD_UNDERLINE.SINGLE
else False
if val == WD_UNDERLINE.NONE
else val
)
@underline.setter
def underline(self, value: bool | WD_UNDERLINE | None) -> None:
rPr = self._element.get_or_add_rPr()
# -- works fine without these two mappings, but only because True == 1 and
# -- False == 0, which happen to match the mapping for WD_UNDERLINE.SINGLE
# -- and .NONE respectively.
val = (
WD_UNDERLINE.SINGLE if value is True else WD_UNDERLINE.NONE if value is False else value
)
rPr.u_val = val
@property
def web_hidden(self) -> bool | None:
"""Read/write tri-state value.
When |True|, specifies that the contents of this run shall be hidden when the
document is displayed in web page view.
"""
return self._get_bool_prop("webHidden")
@web_hidden.setter
def web_hidden(self, value: bool | None) -> None:
self._set_bool_prop("webHidden", value)
def _get_bool_prop(self, name: str) -> bool | None:
"""Return the value of boolean child of `w:rPr` having `name`."""
rPr = self._element.rPr
if rPr is None:
return None
return rPr._get_bool_val(name) # pyright: ignore[reportPrivateUsage]
def _set_bool_prop(self, name: str, value: bool | None):
"""Assign `value` to the boolean child `name` of `w:rPr`."""
rPr = self._element.get_or_add_rPr()
rPr._set_bool_val(name, value) # pyright: ignore[reportPrivateUsage]

View File

@@ -0,0 +1,121 @@
"""Hyperlink-related proxy objects for python-docx, Hyperlink in particular.
A hyperlink occurs in a paragraph, at the same level as a Run, and a hyperlink itself
contains runs, which is where the visible text of the hyperlink is stored. So it's kind
of in-between, less than a paragraph and more than a run. So it gets its own module.
"""
from __future__ import annotations
from typing import TYPE_CHECKING
from docx.shared import Parented
from docx.text.run import Run
if TYPE_CHECKING:
import docx.types as t
from docx.oxml.text.hyperlink import CT_Hyperlink
class Hyperlink(Parented):
"""Proxy object wrapping a `<w:hyperlink>` element.
A hyperlink occurs as a child of a paragraph, at the same level as a Run. A
hyperlink itself contains runs, which is where the visible text of the hyperlink is
stored.
"""
def __init__(self, hyperlink: CT_Hyperlink, parent: t.ProvidesStoryPart):
super().__init__(parent)
self._parent = parent
self._hyperlink = self._element = hyperlink
@property
def address(self) -> str:
"""The "URL" of the hyperlink (but not necessarily a web link).
While commonly a web link like "https://google.com" the hyperlink address can
take a variety of forms including "internal links" to bookmarked locations
within the document. When this hyperlink is an internal "jump" to for example a
heading from the table-of-contents (TOC), the address is blank. The bookmark
reference (like "_Toc147925734") is stored in the `.fragment` property.
"""
rId = self._hyperlink.rId
return self._parent.part.rels[rId].target_ref if rId else ""
@property
def contains_page_break(self) -> bool:
"""True when the text of this hyperlink is broken across page boundaries.
This is not uncommon and can happen for example when the hyperlink text is
multiple words and occurs in the last line of a page. Theoretically, a hyperlink
can contain more than one page break but that would be extremely uncommon in
practice. Still, this value should be understood to mean that "one-or-more"
rendered page breaks are present.
"""
return bool(self._hyperlink.lastRenderedPageBreaks)
@property
def fragment(self) -> str:
"""Reference like `#glossary` at end of URL that refers to a sub-resource.
Note that this value does not include the fragment-separator character ("#").
This value is known as a "named anchor" in an HTML context and "anchor" in the
MS API, but an "anchor" element (`<a>`) represents a full hyperlink in HTML so
we avoid confusion by using the more precise RFC 3986 naming "URI fragment".
These are also used to refer to bookmarks within the same document, in which
case the `.address` value with be blank ("") and this property will hold a
value like "_Toc147925734".
To reliably get an entire web URL you will need to concatenate this with the
`.address` value, separated by "#" when both are present. Consider using the
`.url` property for that purpose.
Word sometimes stores a fragment in this property (an XML attribute) and
sometimes with the address, depending on how the URL is inserted, so don't
depend on this field being empty to indicate no fragment is present.
"""
return self._hyperlink.anchor or ""
@property
def runs(self) -> list[Run]:
"""List of |Run| instances in this hyperlink.
Together these define the visible text of the hyperlink. The text of a hyperlink
is typically contained in a single run will be broken into multiple runs if for
example part of the hyperlink is bold or the text was changed after the document
was saved.
"""
return [Run(r, self._parent) for r in self._hyperlink.r_lst]
@property
def text(self) -> str:
"""String formed by concatenating the text of each run in the hyperlink.
Tabs and line breaks in the XML are mapped to ``\\t`` and ``\\n`` characters
respectively. Note that rendered page-breaks can occur within a hyperlink but
they are not reflected in this text.
"""
return self._hyperlink.text
@property
def url(self) -> str:
"""Convenience property to get web URLs from hyperlinks that contain them.
This value is the empty string ("") when there is no address portion, so its
boolean value can also be used to distinguish external URIs from internal "jump"
hyperlinks like those found in a table-of-contents.
Note that this value may also be a link to a file, so if you only want web-urls
you'll need to check for a protocol prefix like `https://`.
When both an address and fragment are present, the return value joins the two
separated by the fragment-separator hash ("#"). Otherwise this value is the same
as that of the `.address` property.
"""
address, fragment = self.address, self.fragment
if not address:
return ""
return f"{address}#{fragment}" if fragment else address

View File

@@ -0,0 +1,104 @@
"""Proxy objects related to rendered page-breaks."""
from __future__ import annotations
from typing import TYPE_CHECKING
from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
from docx.shared import Parented
if TYPE_CHECKING:
import docx.types as t
from docx.text.paragraph import Paragraph
class RenderedPageBreak(Parented):
"""A page-break inserted by Word during page-layout for print or display purposes.
This usually does not correspond to a "hard" page-break inserted by the document
author, rather just that Word ran out of room on one page and needed to start
another. The position of these can change depending on the printer and page-size, as
well as margins, etc. They also will change in response to edits, but not until Word
loads and saves the document.
Note these are never inserted by `python-docx` because it has no rendering function.
These are generally only useful for text-extraction of existing documents when
`python-docx` is being used solely as a document "reader".
NOTE: a rendered page-break can occur within a hyperlink; consider a multi-word
hyperlink like "excellent Wikipedia article on LLMs" that happens to fall close to
the end of the last line on a page such that the page breaks between "Wikipedia" and
"article". In such a "page-breaks-in-hyperlink" case, THESE METHODS WILL "MOVE" THE
PAGE-BREAK to occur after the hyperlink, such that the entire hyperlink appears in
the paragraph returned by `.preceding_paragraph_fragment`. While this places the
"tail" text of the hyperlink on the "wrong" page, it avoids having two hyperlinks
each with a fragment of the actual text and pointing to the same address.
"""
def __init__(
self,
lastRenderedPageBreak: CT_LastRenderedPageBreak,
parent: t.ProvidesStoryPart,
):
super().__init__(parent)
self._element = lastRenderedPageBreak
self._lastRenderedPageBreak = lastRenderedPageBreak
@property
def preceding_paragraph_fragment(self) -> Paragraph | None:
"""A "loose" paragraph containing the content preceding this page-break.
Compare `.following_paragraph_fragment` as these two are intended to be used
together.
This value is `None` when no content precedes this page-break. This case is
common and occurs whenever a page breaks on an even paragraph boundary.
Returning `None` for this case avoids "inserting" a non-existent paragraph into
the content stream. Note that content can include DrawingML items like images or
charts.
Note the returned paragraph *is divorced from the document body*. Any changes
made to it will not be reflected in the document. It is intended to provide a
familiar container (`Paragraph`) to interrogate for the content preceding this
page-break in the paragraph in which it occured.
Contains the entire hyperlink when this break occurs within a hyperlink.
"""
if self._lastRenderedPageBreak.precedes_all_content:
return None
from docx.text.paragraph import Paragraph
return Paragraph(self._lastRenderedPageBreak.preceding_fragment_p, self._parent)
@property
def following_paragraph_fragment(self) -> Paragraph | None:
"""A "loose" paragraph containing the content following this page-break.
HAS POTENTIALLY SURPRISING BEHAVIORS so read carefully to be sure this is what
you want. This is primarily targeted toward text-extraction use-cases for which
precisely associating text with the page it occurs on is important.
Compare `.preceding_paragraph_fragment` as these two are intended to be used
together.
This value is `None` when no content follows this page-break. This case is
unlikely to occur in practice because Word places even-paragraph-boundary
page-breaks on the paragraph *following* the page-break. Still, it is possible
and must be checked for. Returning `None` for this case avoids "inserting" an
extra, non-existent paragraph into the content stream. Note that content can
include DrawingML items like images or charts, not just text.
The returned paragraph *is divorced from the document body*. Any changes made to
it will not be reflected in the document. It is intended to provide a container
(`Paragraph`) with familiar properties and methods that can be used to
characterize the paragraph content following a mid-paragraph page-break.
Contains no portion of the hyperlink when this break occurs within a hyperlink.
"""
if self._lastRenderedPageBreak.follows_all_content:
return None
from docx.text.paragraph import Paragraph
return Paragraph(self._lastRenderedPageBreak.following_fragment_p, self._parent)

View File

@@ -0,0 +1,173 @@
"""Paragraph-related proxy types."""
from __future__ import annotations
from typing import TYPE_CHECKING, Iterator, List, cast
from docx.enum.style import WD_STYLE_TYPE
from docx.oxml.text.run import CT_R
from docx.shared import StoryChild
from docx.styles.style import ParagraphStyle
from docx.text.hyperlink import Hyperlink
from docx.text.pagebreak import RenderedPageBreak
from docx.text.parfmt import ParagraphFormat
from docx.text.run import Run
if TYPE_CHECKING:
import docx.types as t
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.oxml.text.paragraph import CT_P
from docx.styles.style import CharacterStyle
class Paragraph(StoryChild):
"""Proxy object wrapping a `<w:p>` element."""
def __init__(self, p: CT_P, parent: t.ProvidesStoryPart):
super(Paragraph, self).__init__(parent)
self._p = self._element = p
def add_run(self, text: str | None = None, style: str | CharacterStyle | None = None) -> Run:
"""Append run containing `text` and having character-style `style`.
`text` can contain tab (``\\t``) characters, which are converted to the
appropriate XML form for a tab. `text` can also include newline (``\\n``) or
carriage return (``\\r``) characters, each of which is converted to a line
break. When `text` is `None`, the new run is empty.
"""
r = self._p.add_r()
run = Run(r, self)
if text:
run.text = text
if style:
run.style = style
return run
@property
def alignment(self) -> WD_PARAGRAPH_ALIGNMENT | None:
"""A member of the :ref:`WdParagraphAlignment` enumeration specifying the
justification setting for this paragraph.
A value of |None| indicates the paragraph has no directly-applied alignment
value and will inherit its alignment value from its style hierarchy. Assigning
|None| to this property removes any directly-applied alignment value.
"""
return self._p.alignment
@alignment.setter
def alignment(self, value: WD_PARAGRAPH_ALIGNMENT):
self._p.alignment = value
def clear(self):
"""Return this same paragraph after removing all its content.
Paragraph-level formatting, such as style, is preserved.
"""
self._p.clear_content()
return self
@property
def contains_page_break(self) -> bool:
"""`True` when one or more rendered page-breaks occur in this paragraph."""
return bool(self._p.lastRenderedPageBreaks)
@property
def hyperlinks(self) -> List[Hyperlink]:
"""A |Hyperlink| instance for each hyperlink in this paragraph."""
return [Hyperlink(hyperlink, self) for hyperlink in self._p.hyperlink_lst]
def insert_paragraph_before(
self, text: str | None = None, style: str | ParagraphStyle | None = None
) -> Paragraph:
"""Return a newly created paragraph, inserted directly before this paragraph.
If `text` is supplied, the new paragraph contains that text in a single run. If
`style` is provided, that style is assigned to the new paragraph.
"""
paragraph = self._insert_paragraph_before()
if text:
paragraph.add_run(text)
if style is not None:
paragraph.style = style
return paragraph
def iter_inner_content(self) -> Iterator[Run | Hyperlink]:
"""Generate the runs and hyperlinks in this paragraph, in the order they appear.
The content in a paragraph consists of both runs and hyperlinks. This method
allows accessing each of those separately, in document order, for when the
precise position of the hyperlink within the paragraph text is important. Note
that a hyperlink itself contains runs.
"""
for r_or_hlink in self._p.inner_content_elements:
yield (
Run(r_or_hlink, self)
if isinstance(r_or_hlink, CT_R)
else Hyperlink(r_or_hlink, self)
)
@property
def paragraph_format(self):
"""The |ParagraphFormat| object providing access to the formatting properties
for this paragraph, such as line spacing and indentation."""
return ParagraphFormat(self._element)
@property
def rendered_page_breaks(self) -> List[RenderedPageBreak]:
"""All rendered page-breaks in this paragraph.
Most often an empty list, sometimes contains one page-break, but can contain
more than one is rare or contrived cases.
"""
return [RenderedPageBreak(lrpb, self) for lrpb in self._p.lastRenderedPageBreaks]
@property
def runs(self) -> List[Run]:
"""Sequence of |Run| instances corresponding to the <w:r> elements in this
paragraph."""
return [Run(r, self) for r in self._p.r_lst]
@property
def style(self) -> ParagraphStyle | None:
"""Read/Write.
|_ParagraphStyle| object representing the style assigned to this paragraph. If
no explicit style is assigned to this paragraph, its value is the default
paragraph style for the document. A paragraph style name can be assigned in lieu
of a paragraph style object. Assigning |None| removes any applied style, making
its effective value the default paragraph style for the document.
"""
style_id = self._p.style
style = self.part.get_style(style_id, WD_STYLE_TYPE.PARAGRAPH)
return cast(ParagraphStyle, style)
@style.setter
def style(self, style_or_name: str | ParagraphStyle | None):
style_id = self.part.get_style_id(style_or_name, WD_STYLE_TYPE.PARAGRAPH)
self._p.style = style_id
@property
def text(self) -> str:
"""The textual content of this paragraph.
The text includes the visible-text portion of any hyperlinks in the paragraph.
Tabs and line breaks in the XML are mapped to ``\\t`` and ``\\n`` characters
respectively.
Assigning text to this property causes all existing paragraph content to be
replaced with a single run containing the assigned text. A ``\\t`` character in
the text is mapped to a ``<w:tab/>`` element and each ``\\n`` or ``\\r``
character is mapped to a line break. Paragraph-level formatting, such as style,
is preserved. All run-level formatting, such as bold or italic, is removed.
"""
return self._p.text
@text.setter
def text(self, text: str | None):
self.clear()
self.add_run(text)
def _insert_paragraph_before(self):
"""Return a newly created paragraph, inserted directly before this paragraph."""
p = self._p.add_p_before()
return Paragraph(p, self._parent)

View File

@@ -0,0 +1,286 @@
"""Paragraph-related proxy types."""
from docx.enum.text import WD_LINE_SPACING
from docx.shared import ElementProxy, Emu, Length, Pt, Twips, lazyproperty
from docx.text.tabstops import TabStops
class ParagraphFormat(ElementProxy):
"""Provides access to paragraph formatting such as justification, indentation, line
spacing, space before and after, and widow/orphan control."""
@property
def alignment(self):
"""A member of the :ref:`WdParagraphAlignment` enumeration specifying the
justification setting for this paragraph.
A value of |None| indicates paragraph alignment is inherited from the style
hierarchy.
"""
pPr = self._element.pPr
if pPr is None:
return None
return pPr.jc_val
@alignment.setter
def alignment(self, value):
pPr = self._element.get_or_add_pPr()
pPr.jc_val = value
@property
def first_line_indent(self):
"""|Length| value specifying the relative difference in indentation for the
first line of the paragraph.
A positive value causes the first line to be indented. A negative value produces
a hanging indent. |None| indicates first line indentation is inherited from the
style hierarchy.
"""
pPr = self._element.pPr
if pPr is None:
return None
return pPr.first_line_indent
@first_line_indent.setter
def first_line_indent(self, value):
pPr = self._element.get_or_add_pPr()
pPr.first_line_indent = value
@property
def keep_together(self):
"""|True| if the paragraph should be kept "in one piece" and not broken across a
page boundary when the document is rendered.
|None| indicates its effective value is inherited from the style hierarchy.
"""
pPr = self._element.pPr
if pPr is None:
return None
return pPr.keepLines_val
@keep_together.setter
def keep_together(self, value):
self._element.get_or_add_pPr().keepLines_val = value
@property
def keep_with_next(self):
"""|True| if the paragraph should be kept on the same page as the subsequent
paragraph when the document is rendered.
For example, this property could be used to keep a section heading on the same
page as its first paragraph. |None| indicates its effective value is inherited
from the style hierarchy.
"""
pPr = self._element.pPr
if pPr is None:
return None
return pPr.keepNext_val
@keep_with_next.setter
def keep_with_next(self, value):
self._element.get_or_add_pPr().keepNext_val = value
@property
def left_indent(self):
"""|Length| value specifying the space between the left margin and the left side
of the paragraph.
|None| indicates the left indent value is inherited from the style hierarchy.
Use an |Inches| value object as a convenient way to apply indentation in units
of inches.
"""
pPr = self._element.pPr
if pPr is None:
return None
return pPr.ind_left
@left_indent.setter
def left_indent(self, value):
pPr = self._element.get_or_add_pPr()
pPr.ind_left = value
@property
def line_spacing(self):
"""|float| or |Length| value specifying the space between baselines in
successive lines of the paragraph.
A value of |None| indicates line spacing is inherited from the style hierarchy.
A float value, e.g. ``2.0`` or ``1.75``, indicates spacing is applied in
multiples of line heights. A |Length| value such as ``Pt(12)`` indicates spacing
is a fixed height. The |Pt| value class is a convenient way to apply line
spacing in units of points. Assigning |None| resets line spacing to inherit from
the style hierarchy.
"""
pPr = self._element.pPr
if pPr is None:
return None
return self._line_spacing(pPr.spacing_line, pPr.spacing_lineRule)
@line_spacing.setter
def line_spacing(self, value):
pPr = self._element.get_or_add_pPr()
if value is None:
pPr.spacing_line = None
pPr.spacing_lineRule = None
elif isinstance(value, Length):
pPr.spacing_line = value
if pPr.spacing_lineRule != WD_LINE_SPACING.AT_LEAST:
pPr.spacing_lineRule = WD_LINE_SPACING.EXACTLY
else:
pPr.spacing_line = Emu(value * Twips(240))
pPr.spacing_lineRule = WD_LINE_SPACING.MULTIPLE
@property
def line_spacing_rule(self):
"""A member of the :ref:`WdLineSpacing` enumeration indicating how the value of
:attr:`line_spacing` should be interpreted.
Assigning any of the :ref:`WdLineSpacing` members :attr:`SINGLE`,
:attr:`DOUBLE`, or :attr:`ONE_POINT_FIVE` will cause the value of
:attr:`line_spacing` to be updated to produce the corresponding line spacing.
"""
pPr = self._element.pPr
if pPr is None:
return None
return self._line_spacing_rule(pPr.spacing_line, pPr.spacing_lineRule)
@line_spacing_rule.setter
def line_spacing_rule(self, value):
pPr = self._element.get_or_add_pPr()
if value == WD_LINE_SPACING.SINGLE:
pPr.spacing_line = Twips(240)
pPr.spacing_lineRule = WD_LINE_SPACING.MULTIPLE
elif value == WD_LINE_SPACING.ONE_POINT_FIVE:
pPr.spacing_line = Twips(360)
pPr.spacing_lineRule = WD_LINE_SPACING.MULTIPLE
elif value == WD_LINE_SPACING.DOUBLE:
pPr.spacing_line = Twips(480)
pPr.spacing_lineRule = WD_LINE_SPACING.MULTIPLE
else:
pPr.spacing_lineRule = value
@property
def page_break_before(self):
"""|True| if the paragraph should appear at the top of the page following the
prior paragraph.
|None| indicates its effective value is inherited from the style hierarchy.
"""
pPr = self._element.pPr
if pPr is None:
return None
return pPr.pageBreakBefore_val
@page_break_before.setter
def page_break_before(self, value):
self._element.get_or_add_pPr().pageBreakBefore_val = value
@property
def right_indent(self):
"""|Length| value specifying the space between the right margin and the right
side of the paragraph.
|None| indicates the right indent value is inherited from the style hierarchy.
Use a |Cm| value object as a convenient way to apply indentation in units of
centimeters.
"""
pPr = self._element.pPr
if pPr is None:
return None
return pPr.ind_right
@right_indent.setter
def right_indent(self, value):
pPr = self._element.get_or_add_pPr()
pPr.ind_right = value
@property
def space_after(self):
"""|Length| value specifying the spacing to appear between this paragraph and
the subsequent paragraph.
|None| indicates this value is inherited from the style hierarchy. |Length|
objects provide convenience properties, such as :attr:`~.Length.pt` and
:attr:`~.Length.inches`, that allow easy conversion to various length units.
"""
pPr = self._element.pPr
if pPr is None:
return None
return pPr.spacing_after
@space_after.setter
def space_after(self, value):
self._element.get_or_add_pPr().spacing_after = value
@property
def space_before(self):
"""|Length| value specifying the spacing to appear between this paragraph and
the prior paragraph.
|None| indicates this value is inherited from the style hierarchy. |Length|
objects provide convenience properties, such as :attr:`~.Length.pt` and
:attr:`~.Length.cm`, that allow easy conversion to various length units.
"""
pPr = self._element.pPr
if pPr is None:
return None
return pPr.spacing_before
@space_before.setter
def space_before(self, value):
self._element.get_or_add_pPr().spacing_before = value
@lazyproperty
def tab_stops(self):
"""|TabStops| object providing access to the tab stops defined for this
paragraph format."""
pPr = self._element.get_or_add_pPr()
return TabStops(pPr)
@property
def widow_control(self):
"""|True| if the first and last lines in the paragraph remain on the same page
as the rest of the paragraph when Word repaginates the document.
|None| indicates its effective value is inherited from the style hierarchy.
"""
pPr = self._element.pPr
if pPr is None:
return None
return pPr.widowControl_val
@widow_control.setter
def widow_control(self, value):
self._element.get_or_add_pPr().widowControl_val = value
@staticmethod
def _line_spacing(spacing_line, spacing_lineRule):
"""Return the line spacing value calculated from the combination of
`spacing_line` and `spacing_lineRule`.
Returns a |float| number of lines when `spacing_lineRule` is
``WD_LINE_SPACING.MULTIPLE``, otherwise a |Length| object of absolute line
height is returned. Returns |None| when `spacing_line` is |None|.
"""
if spacing_line is None:
return None
if spacing_lineRule == WD_LINE_SPACING.MULTIPLE:
return spacing_line / Pt(12)
return spacing_line
@staticmethod
def _line_spacing_rule(line, lineRule):
"""Return the line spacing rule value calculated from the combination of `line`
and `lineRule`.
Returns special members of the :ref:`WdLineSpacing` enumeration when line
spacing is single, double, or 1.5 lines.
"""
if lineRule == WD_LINE_SPACING.MULTIPLE:
if line == Twips(240):
return WD_LINE_SPACING.SINGLE
if line == Twips(360):
return WD_LINE_SPACING.ONE_POINT_FIVE
if line == Twips(480):
return WD_LINE_SPACING.DOUBLE
return lineRule

View File

@@ -0,0 +1,257 @@
"""Run-related proxy objects for python-docx, Run in particular."""
from __future__ import annotations
from typing import IO, TYPE_CHECKING, Iterator, cast
from docx.drawing import Drawing
from docx.enum.style import WD_STYLE_TYPE
from docx.enum.text import WD_BREAK
from docx.oxml.drawing import CT_Drawing
from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
from docx.shape import InlineShape
from docx.shared import StoryChild
from docx.styles.style import CharacterStyle
from docx.text.font import Font
from docx.text.pagebreak import RenderedPageBreak
if TYPE_CHECKING:
import docx.types as t
from docx.enum.text import WD_UNDERLINE
from docx.oxml.text.run import CT_R, CT_Text
from docx.shared import Length
class Run(StoryChild):
"""Proxy object wrapping `<w:r>` element.
Several of the properties on Run take a tri-state value, |True|, |False|, or |None|.
|True| and |False| correspond to on and off respectively. |None| indicates the
property is not specified directly on the run and its effective value is taken from
the style hierarchy.
"""
def __init__(self, r: CT_R, parent: t.ProvidesStoryPart):
super().__init__(parent)
self._r = self._element = self.element = r
def add_break(self, break_type: WD_BREAK = WD_BREAK.LINE):
"""Add a break element of `break_type` to this run.
`break_type` can take the values `WD_BREAK.LINE`, `WD_BREAK.PAGE`, and
`WD_BREAK.COLUMN` where `WD_BREAK` is imported from `docx.enum.text`.
`break_type` defaults to `WD_BREAK.LINE`.
"""
type_, clear = {
WD_BREAK.LINE: (None, None),
WD_BREAK.PAGE: ("page", None),
WD_BREAK.COLUMN: ("column", None),
WD_BREAK.LINE_CLEAR_LEFT: ("textWrapping", "left"),
WD_BREAK.LINE_CLEAR_RIGHT: ("textWrapping", "right"),
WD_BREAK.LINE_CLEAR_ALL: ("textWrapping", "all"),
}[break_type]
br = self._r.add_br()
if type_ is not None:
br.type = type_
if clear is not None:
br.clear = clear
def add_picture(
self,
image_path_or_stream: str | IO[bytes],
width: int | Length | None = None,
height: int | Length | None = None,
) -> InlineShape:
"""Return |InlineShape| containing image identified by `image_path_or_stream`.
The picture is added to the end of this run.
`image_path_or_stream` can be a path (a string) or a file-like object containing
a binary image.
If neither width nor height is specified, the picture appears at
its native size. If only one is specified, it is used to compute a scaling
factor that is then applied to the unspecified dimension, preserving the aspect
ratio of the image. The native size of the picture is calculated using the dots-
per-inch (dpi) value specified in the image file, defaulting to 72 dpi if no
value is specified, as is often the case.
"""
inline = self.part.new_pic_inline(image_path_or_stream, width, height)
self._r.add_drawing(inline)
return InlineShape(inline)
def add_tab(self) -> None:
"""Add a ``<w:tab/>`` element at the end of the run, which Word interprets as a
tab character."""
self._r.add_tab()
def add_text(self, text: str):
"""Returns a newly appended |_Text| object (corresponding to a new ``<w:t>``
child element) to the run, containing `text`.
Compare with the possibly more friendly approach of assigning text to the
:attr:`Run.text` property.
"""
t = self._r.add_t(text)
return _Text(t)
@property
def bold(self) -> bool | None:
"""Read/write tri-state value.
When |True|, causes the text of the run to appear in bold face. When |False|,
the text unconditionally appears non-bold. When |None| the bold setting for this
run is inherited from the style hierarchy.
"""
return self.font.bold
@bold.setter
def bold(self, value: bool | None):
self.font.bold = value
def clear(self):
"""Return reference to this run after removing all its content.
All run formatting is preserved.
"""
self._r.clear_content()
return self
@property
def contains_page_break(self) -> bool:
"""`True` when one or more rendered page-breaks occur in this run.
Note that "hard" page-breaks inserted by the author are not included. A hard
page-break gives rise to a rendered page-break in the right position so if those
were included that page-break would be "double-counted".
It would be very rare for multiple rendered page-breaks to occur in a single
run, but it is possible.
"""
return bool(self._r.lastRenderedPageBreaks)
@property
def font(self) -> Font:
"""The |Font| object providing access to the character formatting properties for
this run, such as font name and size."""
return Font(self._element)
@property
def italic(self) -> bool | None:
"""Read/write tri-state value.
When |True|, causes the text of the run to appear in italics. When |False|, the
text unconditionally appears non-italic. When |None| the italic setting for this
run is inherited from the style hierarchy.
"""
return self.font.italic
@italic.setter
def italic(self, value: bool | None):
self.font.italic = value
def iter_inner_content(self) -> Iterator[str | Drawing | RenderedPageBreak]:
"""Generate the content-items in this run in the order they appear.
NOTE: only content-types currently supported by `python-docx` are generated. In
this version, that is text and rendered page-breaks. Drawing is included but
currently only provides access to its XML element (CT_Drawing) on its
`._drawing` attribute. `Drawing` attributes and methods may be expanded in
future releases.
There are a number of element-types that can appear inside a run, but most of
those (w:br, w:cr, w:noBreakHyphen, w:t, w:tab) have a clear plain-text
equivalent. Any contiguous range of such elements is generated as a single
`str`. Rendered page-break and drawing elements are generated individually. Any
other elements are ignored.
"""
for item in self._r.inner_content_items:
if isinstance(item, str):
yield item
elif isinstance(item, CT_LastRenderedPageBreak):
yield RenderedPageBreak(item, self)
elif isinstance(item, CT_Drawing): # pyright: ignore[reportUnnecessaryIsInstance]
yield Drawing(item, self)
def mark_comment_range(self, last_run: Run, comment_id: int) -> None:
"""Mark the range of runs from this run to `last_run` (inclusive) as belonging to a comment.
`comment_id` identfies the comment that references this range.
"""
# -- insert `w:commentRangeStart` with `comment_id` before this (first) run --
self._r.insert_comment_range_start_above(comment_id)
# -- insert `w:commentRangeEnd` and `w:commentReference` run with `comment_id` after
# -- `last_run`
last_run._r.insert_comment_range_end_and_reference_below(comment_id)
@property
def style(self) -> CharacterStyle:
"""Read/write.
A |CharacterStyle| object representing the character style applied to this run.
The default character style for the document (often `Default Character Font`) is
returned if the run has no directly-applied character style. Setting this
property to |None| removes any directly-applied character style.
"""
style_id = self._r.style
return cast(CharacterStyle, self.part.get_style(style_id, WD_STYLE_TYPE.CHARACTER))
@style.setter
def style(self, style_or_name: str | CharacterStyle | None):
style_id = self.part.get_style_id(style_or_name, WD_STYLE_TYPE.CHARACTER)
self._r.style = style_id
@property
def text(self) -> str:
"""String formed by concatenating the text equivalent of each run.
Each `<w:t>` element adds the text characters it contains. A `<w:tab/>` element
adds a `\\t` character. A `<w:cr/>` or `<w:br>` element each add a `\\n`
character. Note that a `<w:br>` element can indicate a page break or column
break as well as a line break. Only line-break `<w:br>` elements translate to
a `\\n` character. Others are ignored. All other content child elements, such as
`<w:drawing>`, are ignored.
Assigning text to this property has the reverse effect, translating each `\\t`
character to a `<w:tab/>` element and each `\\n` or `\\r` character to a
`<w:cr/>` element. Any existing run content is replaced. Run formatting is
preserved.
"""
return self._r.text
@text.setter
def text(self, text: str):
self._r.text = text
@property
def underline(self) -> bool | WD_UNDERLINE | None:
"""The underline style for this |Run|.
Value is one of |None|, |True|, |False|, or a member of :ref:`WdUnderline`.
A value of |None| indicates the run has no directly-applied underline value and
so will inherit the underline value of its containing paragraph. Assigning
|None| to this property removes any directly-applied underline value.
A value of |False| indicates a directly-applied setting of no underline,
overriding any inherited value.
A value of |True| indicates single underline.
The values from :ref:`WdUnderline` are used to specify other outline styles such
as double, wavy, and dotted.
"""
return self.font.underline
@underline.setter
def underline(self, value: bool | WD_UNDERLINE | None):
self.font.underline = value
class _Text:
"""Proxy object wrapping `<w:t>` element."""
def __init__(self, t_elm: CT_Text):
super(_Text, self).__init__()
self._t = t_elm

View File

@@ -0,0 +1,123 @@
"""Tabstop-related proxy types."""
from docx.enum.text import WD_TAB_ALIGNMENT, WD_TAB_LEADER
from docx.shared import ElementProxy
class TabStops(ElementProxy):
"""A sequence of |TabStop| objects providing access to the tab stops of a paragraph
or paragraph style.
Supports iteration, indexed access, del, and len(). It is accesed using the
:attr:`~.ParagraphFormat.tab_stops` property of ParagraphFormat; it is not intended
to be constructed directly.
"""
def __init__(self, element):
super(TabStops, self).__init__(element, None)
self._pPr = element
def __delitem__(self, idx):
"""Remove the tab at offset `idx` in this sequence."""
tabs = self._pPr.tabs
try:
tabs.remove(tabs[idx])
except (AttributeError, IndexError):
raise IndexError("tab index out of range")
if len(tabs) == 0:
self._pPr.remove(tabs)
def __getitem__(self, idx):
"""Enables list-style access by index."""
tabs = self._pPr.tabs
if tabs is None:
raise IndexError("TabStops object is empty")
tab = tabs.tab_lst[idx]
return TabStop(tab)
def __iter__(self):
"""Generate a TabStop object for each of the w:tab elements, in XML document
order."""
tabs = self._pPr.tabs
if tabs is not None:
for tab in tabs.tab_lst:
yield TabStop(tab)
def __len__(self):
tabs = self._pPr.tabs
if tabs is None:
return 0
return len(tabs.tab_lst)
def add_tab_stop(self, position, alignment=WD_TAB_ALIGNMENT.LEFT, leader=WD_TAB_LEADER.SPACES):
"""Add a new tab stop at `position`, a |Length| object specifying the location
of the tab stop relative to the paragraph edge.
A negative `position` value is valid and appears in hanging indentation. Tab
alignment defaults to left, but may be specified by passing a member of the
:ref:`WdTabAlignment` enumeration as `alignment`. An optional leader character
can be specified by passing a member of the :ref:`WdTabLeader` enumeration as
`leader`.
"""
tabs = self._pPr.get_or_add_tabs()
tab = tabs.insert_tab_in_order(position, alignment, leader)
return TabStop(tab)
def clear_all(self):
"""Remove all custom tab stops."""
self._pPr._remove_tabs()
class TabStop(ElementProxy):
"""An individual tab stop applying to a paragraph or style.
Accessed using list semantics on its containing |TabStops| object.
"""
def __init__(self, element):
super(TabStop, self).__init__(element, None)
self._tab = element
@property
def alignment(self):
"""A member of :ref:`WdTabAlignment` specifying the alignment setting for this
tab stop.
Read/write.
"""
return self._tab.val
@alignment.setter
def alignment(self, value):
self._tab.val = value
@property
def leader(self):
"""A member of :ref:`WdTabLeader` specifying a repeating character used as a
"leader", filling in the space spanned by this tab.
Assigning |None| produces the same result as assigning `WD_TAB_LEADER.SPACES`.
Read/write.
"""
return self._tab.leader
@leader.setter
def leader(self, value):
self._tab.leader = value
@property
def position(self):
"""A |Length| object representing the distance of this tab stop from the inside
edge of the paragraph.
May be positive or negative. Read/write.
"""
return self._tab.pos
@position.setter
def position(self, value):
tab = self._tab
tabs = tab.getparent()
self._tab = tabs.insert_tab_in_order(value, tab.val, tab.leader)
tabs.remove(tab)