Initial commit (Clean history)
This commit is contained in:
65
path/to/venv/lib/python3.12/site-packages/docx/__init__.py
Normal file
65
path/to/venv/lib/python3.12/site-packages/docx/__init__.py
Normal file
@@ -0,0 +1,65 @@
|
||||
"""Initialize `docx` package.
|
||||
|
||||
Export the `Document` constructor function and establish the mapping of part-type to
|
||||
the part-classe that implements that type.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Type
|
||||
|
||||
from docx.api import Document
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.opc.part import Part
|
||||
|
||||
__version__ = "1.2.0"
|
||||
|
||||
|
||||
__all__ = ["Document"]
|
||||
|
||||
|
||||
# -- register custom Part classes with opc package reader --
|
||||
|
||||
from docx.opc.constants import CONTENT_TYPE as CT
|
||||
from docx.opc.constants import RELATIONSHIP_TYPE as RT
|
||||
from docx.opc.part import PartFactory
|
||||
from docx.opc.parts.coreprops import CorePropertiesPart
|
||||
from docx.parts.comments import CommentsPart
|
||||
from docx.parts.document import DocumentPart
|
||||
from docx.parts.hdrftr import FooterPart, HeaderPart
|
||||
from docx.parts.image import ImagePart
|
||||
from docx.parts.numbering import NumberingPart
|
||||
from docx.parts.settings import SettingsPart
|
||||
from docx.parts.styles import StylesPart
|
||||
|
||||
|
||||
def part_class_selector(content_type: str, reltype: str) -> Type[Part] | None:
|
||||
if reltype == RT.IMAGE:
|
||||
return ImagePart
|
||||
return None
|
||||
|
||||
|
||||
PartFactory.part_class_selector = part_class_selector
|
||||
PartFactory.part_type_for[CT.OPC_CORE_PROPERTIES] = CorePropertiesPart
|
||||
PartFactory.part_type_for[CT.WML_COMMENTS] = CommentsPart
|
||||
PartFactory.part_type_for[CT.WML_DOCUMENT_MAIN] = DocumentPart
|
||||
PartFactory.part_type_for[CT.WML_FOOTER] = FooterPart
|
||||
PartFactory.part_type_for[CT.WML_HEADER] = HeaderPart
|
||||
PartFactory.part_type_for[CT.WML_NUMBERING] = NumberingPart
|
||||
PartFactory.part_type_for[CT.WML_SETTINGS] = SettingsPart
|
||||
PartFactory.part_type_for[CT.WML_STYLES] = StylesPart
|
||||
|
||||
del (
|
||||
CT,
|
||||
CorePropertiesPart,
|
||||
CommentsPart,
|
||||
DocumentPart,
|
||||
FooterPart,
|
||||
HeaderPart,
|
||||
NumberingPart,
|
||||
PartFactory,
|
||||
SettingsPart,
|
||||
StylesPart,
|
||||
part_class_selector,
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
37
path/to/venv/lib/python3.12/site-packages/docx/api.py
Normal file
37
path/to/venv/lib/python3.12/site-packages/docx/api.py
Normal file
@@ -0,0 +1,37 @@
|
||||
"""Directly exposed API functions and classes, :func:`Document` for now.
|
||||
|
||||
Provides a syntactically more convenient API for interacting with the OpcPackage graph.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import IO, TYPE_CHECKING, cast
|
||||
|
||||
from docx.opc.constants import CONTENT_TYPE as CT
|
||||
from docx.package import Package
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.document import Document as DocumentObject
|
||||
from docx.parts.document import DocumentPart
|
||||
|
||||
|
||||
def Document(docx: str | IO[bytes] | None = None) -> DocumentObject:
|
||||
"""Return a |Document| object loaded from `docx`, where `docx` can be either a path
|
||||
to a ``.docx`` file (a string) or a file-like object.
|
||||
|
||||
If `docx` is missing or ``None``, the built-in default document "template" is
|
||||
loaded.
|
||||
"""
|
||||
docx = _default_docx_path() if docx is None else docx
|
||||
document_part = cast("DocumentPart", Package.open(docx).main_document_part)
|
||||
if document_part.content_type != CT.WML_DOCUMENT_MAIN:
|
||||
tmpl = "file '%s' is not a Word file, content type is '%s'"
|
||||
raise ValueError(tmpl % (docx, document_part.content_type))
|
||||
return document_part.document
|
||||
|
||||
|
||||
def _default_docx_path():
|
||||
"""Return the path to the built-in default .docx package."""
|
||||
_thisdir = os.path.split(__file__)[0]
|
||||
return os.path.join(_thisdir, "templates", "default.docx")
|
||||
101
path/to/venv/lib/python3.12/site-packages/docx/blkcntnr.py
Normal file
101
path/to/venv/lib/python3.12/site-packages/docx/blkcntnr.py
Normal file
@@ -0,0 +1,101 @@
|
||||
# pyright: reportImportCycles=false
|
||||
|
||||
"""Block item container, used by body, cell, header, etc.
|
||||
|
||||
Block level items are things like paragraph and table, although there are a few other
|
||||
specialized ones like structured document tags.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Iterator
|
||||
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
from docx.oxml.table import CT_Tbl
|
||||
from docx.oxml.text.paragraph import CT_P
|
||||
from docx.shared import StoryChild
|
||||
from docx.text.paragraph import Paragraph
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import docx.types as t
|
||||
from docx.oxml.comments import CT_Comment
|
||||
from docx.oxml.document import CT_Body
|
||||
from docx.oxml.section import CT_HdrFtr
|
||||
from docx.oxml.table import CT_Tc
|
||||
from docx.shared import Length
|
||||
from docx.styles.style import ParagraphStyle
|
||||
from docx.table import Table
|
||||
|
||||
BlockItemElement: TypeAlias = "CT_Body | CT_Comment | CT_HdrFtr | CT_Tc"
|
||||
|
||||
|
||||
class BlockItemContainer(StoryChild):
|
||||
"""Base class for proxy objects that can contain block items.
|
||||
|
||||
These containers include _Body, _Cell, header, footer, footnote, endnote, comment,
|
||||
and text box objects. Provides the shared functionality to add a block item like a
|
||||
paragraph or table.
|
||||
"""
|
||||
|
||||
def __init__(self, element: BlockItemElement, parent: t.ProvidesStoryPart):
|
||||
super(BlockItemContainer, self).__init__(parent)
|
||||
self._element = element
|
||||
|
||||
def add_paragraph(self, text: str = "", style: str | ParagraphStyle | None = None) -> Paragraph:
|
||||
"""Return paragraph newly added to the end of the content in this container.
|
||||
|
||||
The paragraph has `text` in a single run if present, and is given paragraph
|
||||
style `style`.
|
||||
|
||||
If `style` is |None|, no paragraph style is applied, which has the same effect
|
||||
as applying the 'Normal' style.
|
||||
"""
|
||||
paragraph = self._add_paragraph()
|
||||
if text:
|
||||
paragraph.add_run(text)
|
||||
if style is not None:
|
||||
paragraph.style = style
|
||||
return paragraph
|
||||
|
||||
def add_table(self, rows: int, cols: int, width: Length) -> Table:
|
||||
"""Return table of `width` having `rows` rows and `cols` columns.
|
||||
|
||||
The table is appended appended at the end of the content in this container.
|
||||
|
||||
`width` is evenly distributed between the table columns.
|
||||
"""
|
||||
from docx.table import Table
|
||||
|
||||
tbl = CT_Tbl.new_tbl(rows, cols, width)
|
||||
self._element._insert_tbl(tbl) # pyright: ignore[reportPrivateUsage]
|
||||
return Table(tbl, self)
|
||||
|
||||
def iter_inner_content(self) -> Iterator[Paragraph | Table]:
|
||||
"""Generate each `Paragraph` or `Table` in this container in document order."""
|
||||
from docx.table import Table
|
||||
|
||||
for element in self._element.inner_content_elements:
|
||||
yield (Paragraph(element, self) if isinstance(element, CT_P) else Table(element, self))
|
||||
|
||||
@property
|
||||
def paragraphs(self):
|
||||
"""A list containing the paragraphs in this container, in document order.
|
||||
|
||||
Read-only.
|
||||
"""
|
||||
return [Paragraph(p, self) for p in self._element.p_lst]
|
||||
|
||||
@property
|
||||
def tables(self):
|
||||
"""A list containing the tables in this container, in document order.
|
||||
|
||||
Read-only.
|
||||
"""
|
||||
from docx.table import Table
|
||||
|
||||
return [Table(tbl, self) for tbl in self._element.tbl_lst]
|
||||
|
||||
def _add_paragraph(self):
|
||||
"""Return paragraph newly added to the end of the content in this container."""
|
||||
return Paragraph(self._element.add_p(), self)
|
||||
163
path/to/venv/lib/python3.12/site-packages/docx/comments.py
Normal file
163
path/to/venv/lib/python3.12/site-packages/docx/comments.py
Normal file
@@ -0,0 +1,163 @@
|
||||
"""Collection providing access to comments added to this document."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
from typing import TYPE_CHECKING, Iterator
|
||||
|
||||
from docx.blkcntnr import BlockItemContainer
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.oxml.comments import CT_Comment, CT_Comments
|
||||
from docx.parts.comments import CommentsPart
|
||||
from docx.styles.style import ParagraphStyle
|
||||
from docx.text.paragraph import Paragraph
|
||||
|
||||
|
||||
class Comments:
|
||||
"""Collection containing the comments added to this document."""
|
||||
|
||||
def __init__(self, comments_elm: CT_Comments, comments_part: CommentsPart):
|
||||
self._comments_elm = comments_elm
|
||||
self._comments_part = comments_part
|
||||
|
||||
def __iter__(self) -> Iterator[Comment]:
|
||||
"""Iterator over the comments in this collection."""
|
||||
return (
|
||||
Comment(comment_elm, self._comments_part)
|
||||
for comment_elm in self._comments_elm.comment_lst
|
||||
)
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""The number of comments in this collection."""
|
||||
return len(self._comments_elm.comment_lst)
|
||||
|
||||
def add_comment(self, text: str = "", author: str = "", initials: str | None = "") -> Comment:
|
||||
"""Add a new comment to the document and return it.
|
||||
|
||||
The comment is added to the end of the comments collection and is assigned a unique
|
||||
comment-id.
|
||||
|
||||
If `text` is provided, it is added to the comment. This option provides for the common
|
||||
case where a comment contains a modest passage of plain text. Multiple paragraphs can be
|
||||
added using the `text` argument by separating their text with newlines (`"\\\\n"`).
|
||||
Between newlines, text is interpreted as it is in `Document.add_paragraph(text=...)`.
|
||||
|
||||
The default is to place a single empty paragraph in the comment, which is the same
|
||||
behavior as the Word UI when you add a comment. New runs can be added to the first
|
||||
paragraph in the empty comment with `comments.paragraphs[0].add_run()` to adding more
|
||||
complex text with emphasis or images. Additional paragraphs can be added using
|
||||
`.add_paragraph()`.
|
||||
|
||||
`author` is a required attribute, set to the empty string by default.
|
||||
|
||||
`initials` is an optional attribute, set to the empty string by default. Passing |None|
|
||||
for the `initials` parameter causes that attribute to be omitted from the XML.
|
||||
"""
|
||||
comment_elm = self._comments_elm.add_comment()
|
||||
comment_elm.author = author
|
||||
comment_elm.initials = initials
|
||||
comment_elm.date = dt.datetime.now(dt.timezone.utc)
|
||||
comment = Comment(comment_elm, self._comments_part)
|
||||
|
||||
if text == "":
|
||||
return comment
|
||||
|
||||
para_text_iter = iter(text.split("\n"))
|
||||
|
||||
first_para_text = next(para_text_iter)
|
||||
first_para = comment.paragraphs[0]
|
||||
first_para.add_run(first_para_text)
|
||||
|
||||
for s in para_text_iter:
|
||||
comment.add_paragraph(text=s)
|
||||
|
||||
return comment
|
||||
|
||||
def get(self, comment_id: int) -> Comment | None:
|
||||
"""Return the comment identified by `comment_id`, or |None| if not found."""
|
||||
comment_elm = self._comments_elm.get_comment_by_id(comment_id)
|
||||
return Comment(comment_elm, self._comments_part) if comment_elm is not None else None
|
||||
|
||||
|
||||
class Comment(BlockItemContainer):
|
||||
"""Proxy for a single comment in the document.
|
||||
|
||||
Provides methods to access comment metadata such as author, initials, and date.
|
||||
|
||||
A comment is also a block-item container, similar to a table cell, so it can contain both
|
||||
paragraphs and tables and its paragraphs can contain rich text, hyperlinks and images,
|
||||
although the common case is that a comment contains a single paragraph of plain text like a
|
||||
sentence or phrase.
|
||||
|
||||
Note that certain content like tables may not be displayed in the Word comment sidebar due to
|
||||
space limitations. Such "over-sized" content can still be viewed in the review pane.
|
||||
"""
|
||||
|
||||
def __init__(self, comment_elm: CT_Comment, comments_part: CommentsPart):
|
||||
super().__init__(comment_elm, comments_part)
|
||||
self._comment_elm = comment_elm
|
||||
|
||||
def add_paragraph(self, text: str = "", style: str | ParagraphStyle | None = None) -> Paragraph:
|
||||
"""Return paragraph newly added to the end of the content in this container.
|
||||
|
||||
The paragraph has `text` in a single run if present, and is given paragraph style `style`.
|
||||
When `style` is |None| or ommitted, the "CommentText" paragraph style is applied, which is
|
||||
the default style for comments.
|
||||
"""
|
||||
paragraph = super().add_paragraph(text, style)
|
||||
|
||||
# -- have to assign style directly to element because `paragraph.style` raises when
|
||||
# -- a style is not present in the styles part
|
||||
if style is None:
|
||||
paragraph._p.style = "CommentText" # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
return paragraph
|
||||
|
||||
@property
|
||||
def author(self) -> str:
|
||||
"""Read/write. The recorded author of this comment.
|
||||
|
||||
This field is required but can be set to the empty string.
|
||||
"""
|
||||
return self._comment_elm.author
|
||||
|
||||
@author.setter
|
||||
def author(self, value: str):
|
||||
self._comment_elm.author = value
|
||||
|
||||
@property
|
||||
def comment_id(self) -> int:
|
||||
"""The unique identifier of this comment."""
|
||||
return self._comment_elm.id
|
||||
|
||||
@property
|
||||
def initials(self) -> str | None:
|
||||
"""Read/write. The recorded initials of the comment author.
|
||||
|
||||
This attribute is optional in the XML, returns |None| if not set. Assigning |None| removes
|
||||
any existing initials from the XML.
|
||||
"""
|
||||
return self._comment_elm.initials
|
||||
|
||||
@initials.setter
|
||||
def initials(self, value: str | None):
|
||||
self._comment_elm.initials = value
|
||||
|
||||
@property
|
||||
def text(self) -> str:
|
||||
"""The text content of this comment as a string.
|
||||
|
||||
Only content in paragraphs is included and of course all emphasis and styling is stripped.
|
||||
|
||||
Paragraph boundaries are indicated with a newline (`"\\\\n"`)
|
||||
"""
|
||||
return "\n".join(p.text for p in self.paragraphs)
|
||||
|
||||
@property
|
||||
def timestamp(self) -> dt.datetime | None:
|
||||
"""The date and time this comment was authored.
|
||||
|
||||
This attribute is optional in the XML, returns |None| if not set.
|
||||
"""
|
||||
return self._comment_elm.date
|
||||
Binary file not shown.
Binary file not shown.
112
path/to/venv/lib/python3.12/site-packages/docx/dml/color.py
Normal file
112
path/to/venv/lib/python3.12/site-packages/docx/dml/color.py
Normal file
@@ -0,0 +1,112 @@
|
||||
"""DrawingML objects related to color, ColorFormat being the most prominent."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, cast
|
||||
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
from docx.enum.dml import MSO_COLOR_TYPE
|
||||
from docx.oxml.simpletypes import ST_HexColorAuto
|
||||
from docx.shared import ElementProxy, RGBColor
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.enum.dml import MSO_THEME_COLOR
|
||||
from docx.oxml.text.font import CT_Color
|
||||
from docx.oxml.text.run import CT_R
|
||||
|
||||
# -- other element types can be a parent of an `w:rPr` element, but for now only `w:r` is --
|
||||
RPrParent: TypeAlias = "CT_R"
|
||||
|
||||
|
||||
class ColorFormat(ElementProxy):
|
||||
"""Provides access to color settings like RGB color, theme color, and luminance adjustments."""
|
||||
|
||||
def __init__(self, rPr_parent: RPrParent):
|
||||
super(ColorFormat, self).__init__(rPr_parent)
|
||||
self._element = rPr_parent
|
||||
|
||||
@property
|
||||
def rgb(self) -> RGBColor | None:
|
||||
"""An |RGBColor| value or |None| if no RGB color is specified.
|
||||
|
||||
When :attr:`type` is `MSO_COLOR_TYPE.RGB`, the value of this property will always be an
|
||||
|RGBColor| value. It may also be an |RGBColor| value if :attr:`type` is
|
||||
`MSO_COLOR_TYPE.THEME`, as Word writes the current value of a theme color when one is
|
||||
assigned. In that case, the RGB value should be interpreted as no more than a good guess
|
||||
however, as the theme color takes precedence at rendering time. Its value is |None|
|
||||
whenever :attr:`type` is either |None| or `MSO_COLOR_TYPE.AUTO`.
|
||||
|
||||
Assigning an |RGBColor| value causes :attr:`type` to become `MSO_COLOR_TYPE.RGB` and any
|
||||
theme color is removed. Assigning |None| causes any color to be removed such that the
|
||||
effective color is inherited from the style hierarchy.
|
||||
"""
|
||||
color = self._color
|
||||
if color is None:
|
||||
return None
|
||||
if color.val == ST_HexColorAuto.AUTO:
|
||||
return None
|
||||
return cast(RGBColor, color.val)
|
||||
|
||||
@rgb.setter
|
||||
def rgb(self, value: RGBColor | None):
|
||||
if value is None and self._color is None:
|
||||
return
|
||||
rPr = self._element.get_or_add_rPr()
|
||||
rPr._remove_color() # pyright: ignore[reportPrivateUsage]
|
||||
if value is not None:
|
||||
rPr.get_or_add_color().val = value
|
||||
|
||||
@property
|
||||
def theme_color(self) -> MSO_THEME_COLOR | None:
|
||||
"""Member of :ref:`MsoThemeColorIndex` or |None| if no theme color is specified.
|
||||
|
||||
When :attr:`type` is `MSO_COLOR_TYPE.THEME`, the value of this property will always be a
|
||||
member of :ref:`MsoThemeColorIndex`. When :attr:`type` has any other value, the value of
|
||||
this property is |None|.
|
||||
|
||||
Assigning a member of :ref:`MsoThemeColorIndex` causes :attr:`type` to become
|
||||
`MSO_COLOR_TYPE.THEME`. Any existing RGB value is retained but ignored by Word. Assigning
|
||||
|None| causes any color specification to be removed such that the effective color is
|
||||
inherited from the style hierarchy.
|
||||
"""
|
||||
color = self._color
|
||||
if color is None:
|
||||
return None
|
||||
return color.themeColor
|
||||
|
||||
@theme_color.setter
|
||||
def theme_color(self, value: MSO_THEME_COLOR | None):
|
||||
if value is None:
|
||||
if self._color is not None and self._element.rPr is not None:
|
||||
self._element.rPr._remove_color() # pyright: ignore[reportPrivateUsage]
|
||||
return
|
||||
self._element.get_or_add_rPr().get_or_add_color().themeColor = value
|
||||
|
||||
@property
|
||||
def type(self) -> MSO_COLOR_TYPE | None:
|
||||
"""Read-only.
|
||||
|
||||
A member of :ref:`MsoColorType`, one of RGB, THEME, or AUTO, corresponding to the way this
|
||||
color is defined. Its value is |None| if no color is applied at this level, which causes
|
||||
the effective color to be inherited from the style hierarchy.
|
||||
"""
|
||||
color = self._color
|
||||
if color is None:
|
||||
return None
|
||||
if color.themeColor is not None:
|
||||
return MSO_COLOR_TYPE.THEME
|
||||
if color.val == ST_HexColorAuto.AUTO:
|
||||
return MSO_COLOR_TYPE.AUTO
|
||||
return MSO_COLOR_TYPE.RGB
|
||||
|
||||
@property
|
||||
def _color(self) -> CT_Color | None:
|
||||
"""Return `w:rPr/w:color` or |None| if not present.
|
||||
|
||||
Helper to factor out repetitive element access.
|
||||
"""
|
||||
rPr = self._element.rPr
|
||||
if rPr is None:
|
||||
return None
|
||||
return rPr.color
|
||||
265
path/to/venv/lib/python3.12/site-packages/docx/document.py
Normal file
265
path/to/venv/lib/python3.12/site-packages/docx/document.py
Normal file
@@ -0,0 +1,265 @@
|
||||
# pyright: reportImportCycles=false
|
||||
# pyright: reportPrivateUsage=false
|
||||
|
||||
"""|Document| and closely related objects."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, TYPE_CHECKING, Iterator, List, Sequence
|
||||
|
||||
from docx.blkcntnr import BlockItemContainer
|
||||
from docx.enum.section import WD_SECTION
|
||||
from docx.enum.text import WD_BREAK
|
||||
from docx.section import Section, Sections
|
||||
from docx.shared import ElementProxy, Emu, Inches, Length
|
||||
from docx.text.run import Run
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import docx.types as t
|
||||
from docx.comments import Comment, Comments
|
||||
from docx.oxml.document import CT_Body, CT_Document
|
||||
from docx.parts.document import DocumentPart
|
||||
from docx.settings import Settings
|
||||
from docx.styles.style import ParagraphStyle, _TableStyle
|
||||
from docx.table import Table
|
||||
from docx.text.paragraph import Paragraph
|
||||
|
||||
|
||||
class Document(ElementProxy):
|
||||
"""WordprocessingML (WML) document.
|
||||
|
||||
Not intended to be constructed directly. Use :func:`docx.Document` to open or create
|
||||
a document.
|
||||
"""
|
||||
|
||||
def __init__(self, element: CT_Document, part: DocumentPart):
|
||||
super(Document, self).__init__(element)
|
||||
self._element = element
|
||||
self._part = part
|
||||
self.__body = None
|
||||
|
||||
def add_comment(
|
||||
self,
|
||||
runs: Run | Sequence[Run],
|
||||
text: str | None = "",
|
||||
author: str = "",
|
||||
initials: str | None = "",
|
||||
) -> Comment:
|
||||
"""Add a comment to the document, anchored to the specified runs.
|
||||
|
||||
`runs` can be a single `Run` object or a non-empty sequence of `Run` objects. Only the
|
||||
first and last run of a sequence are used, it's just more convenient to pass a whole
|
||||
sequence when that's what you have handy, like `paragraph.runs` for example. When `runs`
|
||||
contains a single `Run` object, that run serves as both the first and last run.
|
||||
|
||||
A comment can be anchored only on an even run boundary, meaning the text the comment
|
||||
"references" must be a non-zero integer number of consecutive runs. The runs need not be
|
||||
_contiguous_ per se, like the first can be in one paragraph and the last in the next
|
||||
paragraph, but all runs between the first and the last will be included in the reference.
|
||||
|
||||
The comment reference range is delimited by placing a `w:commentRangeStart` element before
|
||||
the first run and a `w:commentRangeEnd` element after the last run. This is why only the
|
||||
first and last run are required and why a single run can serve as both first and last.
|
||||
Word works out which text to highlight in the UI based on these range markers.
|
||||
|
||||
`text` allows the contents of a simple comment to be provided in the call, providing for
|
||||
the common case where a comment is a single phrase or sentence without special formatting
|
||||
such as bold or italics. More complex comments can be added using the returned `Comment`
|
||||
object in much the same way as a `Document` or (table) `Cell` object, using methods like
|
||||
`.add_paragraph()`, .add_run()`, etc.
|
||||
|
||||
The `author` and `initials` parameters allow that metadata to be set for the comment.
|
||||
`author` is a required attribute on a comment and is the empty string by default.
|
||||
`initials` is optional on a comment and may be omitted by passing |None|, but Word adds an
|
||||
`initials` attribute by default and we follow that convention by using the empty string
|
||||
when no `initials` argument is provided.
|
||||
"""
|
||||
# -- normalize `runs` to a sequence of runs --
|
||||
runs = [runs] if isinstance(runs, Run) else runs
|
||||
first_run = runs[0]
|
||||
last_run = runs[-1]
|
||||
|
||||
# -- Note that comments can only appear in the document part --
|
||||
comment = self.comments.add_comment(text=text, author=author, initials=initials)
|
||||
|
||||
# -- let the first run orchestrate placement of the comment range start and end --
|
||||
first_run.mark_comment_range(last_run, comment.comment_id)
|
||||
|
||||
return comment
|
||||
|
||||
def add_heading(self, text: str = "", level: int = 1):
|
||||
"""Return a heading paragraph newly added to the end of the document.
|
||||
|
||||
The heading paragraph will contain `text` and have its paragraph style
|
||||
determined by `level`. If `level` is 0, the style is set to `Title`. If `level`
|
||||
is 1 (or omitted), `Heading 1` is used. Otherwise the style is set to `Heading
|
||||
{level}`. Raises |ValueError| if `level` is outside the range 0-9.
|
||||
"""
|
||||
if not 0 <= level <= 9:
|
||||
raise ValueError("level must be in range 0-9, got %d" % level)
|
||||
style = "Title" if level == 0 else "Heading %d" % level
|
||||
return self.add_paragraph(text, style)
|
||||
|
||||
def add_page_break(self):
|
||||
"""Return newly |Paragraph| object containing only a page break."""
|
||||
paragraph = self.add_paragraph()
|
||||
paragraph.add_run().add_break(WD_BREAK.PAGE)
|
||||
return paragraph
|
||||
|
||||
def add_paragraph(self, text: str = "", style: str | ParagraphStyle | None = None) -> Paragraph:
|
||||
"""Return paragraph newly added to the end of the document.
|
||||
|
||||
The paragraph is populated with `text` and having paragraph style `style`.
|
||||
|
||||
`text` can contain tab (``\\t``) characters, which are converted to the
|
||||
appropriate XML form for a tab. `text` can also include newline (``\\n``) or
|
||||
carriage return (``\\r``) characters, each of which is converted to a line
|
||||
break.
|
||||
"""
|
||||
return self._body.add_paragraph(text, style)
|
||||
|
||||
def add_picture(
|
||||
self,
|
||||
image_path_or_stream: str | IO[bytes],
|
||||
width: int | Length | None = None,
|
||||
height: int | Length | None = None,
|
||||
):
|
||||
"""Return new picture shape added in its own paragraph at end of the document.
|
||||
|
||||
The picture contains the image at `image_path_or_stream`, scaled based on
|
||||
`width` and `height`. If neither width nor height is specified, the picture
|
||||
appears at its native size. If only one is specified, it is used to compute a
|
||||
scaling factor that is then applied to the unspecified dimension, preserving the
|
||||
aspect ratio of the image. The native size of the picture is calculated using
|
||||
the dots-per-inch (dpi) value specified in the image file, defaulting to 72 dpi
|
||||
if no value is specified, as is often the case.
|
||||
"""
|
||||
run = self.add_paragraph().add_run()
|
||||
return run.add_picture(image_path_or_stream, width, height)
|
||||
|
||||
def add_section(self, start_type: WD_SECTION = WD_SECTION.NEW_PAGE):
|
||||
"""Return a |Section| object newly added at the end of the document.
|
||||
|
||||
The optional `start_type` argument must be a member of the :ref:`WdSectionStart`
|
||||
enumeration, and defaults to ``WD_SECTION.NEW_PAGE`` if not provided.
|
||||
"""
|
||||
new_sectPr = self._element.body.add_section_break()
|
||||
new_sectPr.start_type = start_type
|
||||
return Section(new_sectPr, self._part)
|
||||
|
||||
def add_table(self, rows: int, cols: int, style: str | _TableStyle | None = None):
|
||||
"""Add a table having row and column counts of `rows` and `cols` respectively.
|
||||
|
||||
`style` may be a table style object or a table style name. If `style` is |None|,
|
||||
the table inherits the default table style of the document.
|
||||
"""
|
||||
table = self._body.add_table(rows, cols, self._block_width)
|
||||
table.style = style
|
||||
return table
|
||||
|
||||
@property
|
||||
def comments(self) -> Comments:
|
||||
"""A |Comments| object providing access to comments added to the document."""
|
||||
return self._part.comments
|
||||
|
||||
@property
|
||||
def core_properties(self):
|
||||
"""A |CoreProperties| object providing Dublin Core properties of document."""
|
||||
return self._part.core_properties
|
||||
|
||||
@property
|
||||
def inline_shapes(self):
|
||||
"""The |InlineShapes| collection for this document.
|
||||
|
||||
An inline shape is a graphical object, such as a picture, contained in a run of
|
||||
text and behaving like a character glyph, being flowed like other text in a
|
||||
paragraph.
|
||||
"""
|
||||
return self._part.inline_shapes
|
||||
|
||||
def iter_inner_content(self) -> Iterator[Paragraph | Table]:
|
||||
"""Generate each `Paragraph` or `Table` in this document in document order."""
|
||||
return self._body.iter_inner_content()
|
||||
|
||||
@property
|
||||
def paragraphs(self) -> List[Paragraph]:
|
||||
"""The |Paragraph| instances in the document, in document order.
|
||||
|
||||
Note that paragraphs within revision marks such as ``<w:ins>`` or ``<w:del>`` do
|
||||
not appear in this list.
|
||||
"""
|
||||
return self._body.paragraphs
|
||||
|
||||
@property
|
||||
def part(self) -> DocumentPart:
|
||||
"""The |DocumentPart| object of this document."""
|
||||
return self._part
|
||||
|
||||
def save(self, path_or_stream: str | IO[bytes]):
|
||||
"""Save this document to `path_or_stream`.
|
||||
|
||||
`path_or_stream` can be either a path to a filesystem location (a string) or a
|
||||
file-like object.
|
||||
"""
|
||||
self._part.save(path_or_stream)
|
||||
|
||||
@property
|
||||
def sections(self) -> Sections:
|
||||
"""|Sections| object providing access to each section in this document."""
|
||||
return Sections(self._element, self._part)
|
||||
|
||||
@property
|
||||
def settings(self) -> Settings:
|
||||
"""A |Settings| object providing access to the document-level settings."""
|
||||
return self._part.settings
|
||||
|
||||
@property
|
||||
def styles(self):
|
||||
"""A |Styles| object providing access to the styles in this document."""
|
||||
return self._part.styles
|
||||
|
||||
@property
|
||||
def tables(self) -> List[Table]:
|
||||
"""All |Table| instances in the document, in document order.
|
||||
|
||||
Note that only tables appearing at the top level of the document appear in this
|
||||
list; a table nested inside a table cell does not appear. A table within
|
||||
revision marks such as ``<w:ins>`` or ``<w:del>`` will also not appear in the
|
||||
list.
|
||||
"""
|
||||
return self._body.tables
|
||||
|
||||
@property
|
||||
def _block_width(self) -> Length:
|
||||
"""A |Length| object specifying the space between margins in last section."""
|
||||
section = self.sections[-1]
|
||||
page_width = section.page_width or Inches(8.5)
|
||||
left_margin = section.left_margin or Inches(1)
|
||||
right_margin = section.right_margin or Inches(1)
|
||||
return Emu(page_width - left_margin - right_margin)
|
||||
|
||||
@property
|
||||
def _body(self) -> _Body:
|
||||
"""The |_Body| instance containing the content for this document."""
|
||||
if self.__body is None:
|
||||
self.__body = _Body(self._element.body, self)
|
||||
return self.__body
|
||||
|
||||
|
||||
class _Body(BlockItemContainer):
|
||||
"""Proxy for `<w:body>` element in this document.
|
||||
|
||||
It's primary role is a container for document content.
|
||||
"""
|
||||
|
||||
def __init__(self, body_elm: CT_Body, parent: t.ProvidesStoryPart):
|
||||
super(_Body, self).__init__(body_elm, parent)
|
||||
self._body = body_elm
|
||||
|
||||
def clear_content(self) -> _Body:
|
||||
"""Return this |_Body| instance after clearing it of all content.
|
||||
|
||||
Section properties for the main document story, if present, are preserved.
|
||||
"""
|
||||
self._body.clear_content()
|
||||
return self
|
||||
@@ -0,0 +1,59 @@
|
||||
"""DrawingML-related objects are in this subpackage."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from docx.oxml.drawing import CT_Drawing
|
||||
from docx.shared import Parented
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import docx.types as t
|
||||
from docx.image.image import Image
|
||||
|
||||
|
||||
class Drawing(Parented):
|
||||
"""Container for a DrawingML object."""
|
||||
|
||||
def __init__(self, drawing: CT_Drawing, parent: t.ProvidesStoryPart):
|
||||
super().__init__(parent)
|
||||
self._parent = parent
|
||||
self._drawing = self._element = drawing
|
||||
|
||||
@property
|
||||
def has_picture(self) -> bool:
|
||||
"""True when `drawing` contains an embedded picture.
|
||||
|
||||
A drawing can contain a picture, but it can also contain a chart, SmartArt, or a
|
||||
drawing canvas. Methods related to a picture, like `.image`, will raise when the drawing
|
||||
does not contain a picture. Use this value to determine whether image methods will succeed.
|
||||
|
||||
This value is `False` when a linked picture is present. This should be relatively rare and
|
||||
the image would only be retrievable from the filesystem.
|
||||
|
||||
Note this does not distinguish between inline and floating images. The presence of either
|
||||
one will cause this value to be `True`.
|
||||
"""
|
||||
xpath_expr = (
|
||||
# -- an inline picture --
|
||||
"./wp:inline/a:graphic/a:graphicData/pic:pic"
|
||||
# -- a floating picture --
|
||||
" | ./wp:anchor/a:graphic/a:graphicData/pic:pic"
|
||||
)
|
||||
# -- xpath() will return a list, empty if there are no matches --
|
||||
return bool(self._drawing.xpath(xpath_expr))
|
||||
|
||||
@property
|
||||
def image(self) -> Image:
|
||||
"""An `Image` proxy object for the image in this (picture) drawing.
|
||||
|
||||
Raises `ValueError` when this drawing does contains something other than a picture. Use
|
||||
`.has_picture` to qualify drawing objects before using this property.
|
||||
"""
|
||||
picture_rIds = self._drawing.xpath(".//pic:blipFill/a:blip/@r:embed")
|
||||
if not picture_rIds:
|
||||
raise ValueError("drawing does not contain a picture")
|
||||
rId = picture_rIds[0]
|
||||
doc_part = self.part
|
||||
image_part = doc_part.related_parts[rId]
|
||||
return image_part.image
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
150
path/to/venv/lib/python3.12/site-packages/docx/enum/base.py
Normal file
150
path/to/venv/lib/python3.12/site-packages/docx/enum/base.py
Normal file
@@ -0,0 +1,150 @@
|
||||
"""Base classes and other objects used by enumerations."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import enum
|
||||
import textwrap
|
||||
from typing import TYPE_CHECKING, Any, Dict, Type, TypeVar
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing_extensions import Self
|
||||
|
||||
_T = TypeVar("_T", bound="BaseXmlEnum")
|
||||
|
||||
|
||||
class BaseEnum(int, enum.Enum):
|
||||
"""Base class for Enums that do not map XML attr values.
|
||||
|
||||
The enum's value will be an integer, corresponding to the integer assigned the
|
||||
corresponding member in the MS API enum of the same name.
|
||||
"""
|
||||
|
||||
def __new__(cls, ms_api_value: int, docstr: str):
|
||||
self = int.__new__(cls, ms_api_value)
|
||||
self._value_ = ms_api_value
|
||||
self.__doc__ = docstr.strip()
|
||||
return self
|
||||
|
||||
def __str__(self):
|
||||
"""The symbolic name and string value of this member, e.g. 'MIDDLE (3)'."""
|
||||
return f"{self.name} ({self.value})"
|
||||
|
||||
|
||||
class BaseXmlEnum(int, enum.Enum):
|
||||
"""Base class for Enums that also map XML attr values.
|
||||
|
||||
The enum's value will be an integer, corresponding to the integer assigned the
|
||||
corresponding member in the MS API enum of the same name.
|
||||
"""
|
||||
|
||||
xml_value: str | None
|
||||
|
||||
def __new__(cls, ms_api_value: int, xml_value: str | None, docstr: str):
|
||||
self = int.__new__(cls, ms_api_value)
|
||||
self._value_ = ms_api_value
|
||||
self.xml_value = xml_value
|
||||
self.__doc__ = docstr.strip()
|
||||
return self
|
||||
|
||||
def __str__(self):
|
||||
"""The symbolic name and string value of this member, e.g. 'MIDDLE (3)'."""
|
||||
return f"{self.name} ({self.value})"
|
||||
|
||||
@classmethod
|
||||
def from_xml(cls, xml_value: str | None) -> Self:
|
||||
"""Enumeration member corresponding to XML attribute value `xml_value`.
|
||||
|
||||
Example::
|
||||
|
||||
>>> WD_PARAGRAPH_ALIGNMENT.from_xml("center")
|
||||
WD_PARAGRAPH_ALIGNMENT.CENTER
|
||||
|
||||
"""
|
||||
member = next((member for member in cls if member.xml_value == xml_value), None)
|
||||
if member is None:
|
||||
raise ValueError(f"{cls.__name__} has no XML mapping for '{xml_value}'")
|
||||
return member
|
||||
|
||||
@classmethod
|
||||
def to_xml(cls: Type[_T], value: int | _T | None) -> str | None:
|
||||
"""XML value of this enum member, generally an XML attribute value."""
|
||||
# -- presence of multi-arg `__new__()` method fools type-checker, but getting a
|
||||
# -- member by its value using EnumCls(val) works as usual.
|
||||
member = cls(value)
|
||||
xml_value = member.xml_value
|
||||
if not xml_value:
|
||||
raise ValueError(f"{cls.__name__}.{member.name} has no XML representation")
|
||||
return xml_value
|
||||
|
||||
|
||||
class DocsPageFormatter:
|
||||
"""Generate an .rst doc page for an enumeration.
|
||||
|
||||
Formats a RestructuredText documention page (string) for the enumeration class parts
|
||||
passed to the constructor. An immutable one-shot service object.
|
||||
"""
|
||||
|
||||
def __init__(self, clsname: str, clsdict: Dict[str, Any]):
|
||||
self._clsname = clsname
|
||||
self._clsdict = clsdict
|
||||
|
||||
@property
|
||||
def page_str(self):
|
||||
"""The RestructuredText documentation page for the enumeration.
|
||||
|
||||
This is the only API member for the class.
|
||||
"""
|
||||
tmpl = ".. _%s:\n\n%s\n\n%s\n\n----\n\n%s"
|
||||
components = (
|
||||
self._ms_name,
|
||||
self._page_title,
|
||||
self._intro_text,
|
||||
self._member_defs,
|
||||
)
|
||||
return tmpl % components
|
||||
|
||||
@property
|
||||
def _intro_text(self):
|
||||
"""Docstring of the enumeration, formatted for documentation page."""
|
||||
try:
|
||||
cls_docstring = self._clsdict["__doc__"]
|
||||
except KeyError:
|
||||
cls_docstring = ""
|
||||
|
||||
if cls_docstring is None:
|
||||
return ""
|
||||
|
||||
return textwrap.dedent(cls_docstring).strip()
|
||||
|
||||
def _member_def(self, member: BaseEnum | BaseXmlEnum):
|
||||
"""Return an individual member definition formatted as an RST glossary entry,
|
||||
wrapped to fit within 78 columns."""
|
||||
assert member.__doc__ is not None
|
||||
member_docstring = textwrap.dedent(member.__doc__).strip()
|
||||
member_docstring = textwrap.fill(
|
||||
member_docstring,
|
||||
width=78,
|
||||
initial_indent=" " * 4,
|
||||
subsequent_indent=" " * 4,
|
||||
)
|
||||
return "%s\n%s\n" % (member.name, member_docstring)
|
||||
|
||||
@property
|
||||
def _member_defs(self):
|
||||
"""A single string containing the aggregated member definitions section of the
|
||||
documentation page."""
|
||||
members = self._clsdict["__members__"]
|
||||
member_defs = [self._member_def(member) for member in members if member.name is not None]
|
||||
return "\n".join(member_defs)
|
||||
|
||||
@property
|
||||
def _ms_name(self):
|
||||
"""The Microsoft API name for this enumeration."""
|
||||
return self._clsdict["__ms_name__"]
|
||||
|
||||
@property
|
||||
def _page_title(self):
|
||||
"""The title for the documentation page, formatted as code (surrounded in
|
||||
double-backtics) and underlined with '=' characters."""
|
||||
title_underscore = "=" * (len(self._clsname) + 4)
|
||||
return "``%s``\n%s" % (self._clsname, title_underscore)
|
||||
103
path/to/venv/lib/python3.12/site-packages/docx/enum/dml.py
Normal file
103
path/to/venv/lib/python3.12/site-packages/docx/enum/dml.py
Normal file
@@ -0,0 +1,103 @@
|
||||
"""Enumerations used by DrawingML objects."""
|
||||
|
||||
from .base import BaseEnum, BaseXmlEnum
|
||||
|
||||
|
||||
class MSO_COLOR_TYPE(BaseEnum):
|
||||
"""Specifies the color specification scheme.
|
||||
|
||||
Example::
|
||||
|
||||
from docx.enum.dml import MSO_COLOR_TYPE
|
||||
|
||||
assert font.color.type == MSO_COLOR_TYPE.SCHEME
|
||||
|
||||
MS API name: `MsoColorType`
|
||||
|
||||
http://msdn.microsoft.com/en-us/library/office/ff864912(v=office.15).aspx
|
||||
"""
|
||||
|
||||
RGB = (1, "Color is specified by an |RGBColor| value.")
|
||||
"""Color is specified by an |RGBColor| value."""
|
||||
|
||||
THEME = (2, "Color is one of the preset theme colors.")
|
||||
"""Color is one of the preset theme colors."""
|
||||
|
||||
AUTO = (101, "Color is determined automatically by the application.")
|
||||
"""Color is determined automatically by the application."""
|
||||
|
||||
|
||||
class MSO_THEME_COLOR_INDEX(BaseXmlEnum):
|
||||
"""Indicates the Office theme color, one of those shown in the color gallery on the
|
||||
formatting ribbon.
|
||||
|
||||
Alias: ``MSO_THEME_COLOR``
|
||||
|
||||
Example::
|
||||
|
||||
from docx.enum.dml import MSO_THEME_COLOR
|
||||
|
||||
font.color.theme_color = MSO_THEME_COLOR.ACCENT_1
|
||||
|
||||
MS API name: `MsoThemeColorIndex`
|
||||
|
||||
http://msdn.microsoft.com/en-us/library/office/ff860782(v=office.15).aspx
|
||||
"""
|
||||
|
||||
NOT_THEME_COLOR = (0, "UNMAPPED", "Indicates the color is not a theme color.")
|
||||
"""Indicates the color is not a theme color."""
|
||||
|
||||
ACCENT_1 = (5, "accent1", "Specifies the Accent 1 theme color.")
|
||||
"""Specifies the Accent 1 theme color."""
|
||||
|
||||
ACCENT_2 = (6, "accent2", "Specifies the Accent 2 theme color.")
|
||||
"""Specifies the Accent 2 theme color."""
|
||||
|
||||
ACCENT_3 = (7, "accent3", "Specifies the Accent 3 theme color.")
|
||||
"""Specifies the Accent 3 theme color."""
|
||||
|
||||
ACCENT_4 = (8, "accent4", "Specifies the Accent 4 theme color.")
|
||||
"""Specifies the Accent 4 theme color."""
|
||||
|
||||
ACCENT_5 = (9, "accent5", "Specifies the Accent 5 theme color.")
|
||||
"""Specifies the Accent 5 theme color."""
|
||||
|
||||
ACCENT_6 = (10, "accent6", "Specifies the Accent 6 theme color.")
|
||||
"""Specifies the Accent 6 theme color."""
|
||||
|
||||
BACKGROUND_1 = (14, "background1", "Specifies the Background 1 theme color.")
|
||||
"""Specifies the Background 1 theme color."""
|
||||
|
||||
BACKGROUND_2 = (16, "background2", "Specifies the Background 2 theme color.")
|
||||
"""Specifies the Background 2 theme color."""
|
||||
|
||||
DARK_1 = (1, "dark1", "Specifies the Dark 1 theme color.")
|
||||
"""Specifies the Dark 1 theme color."""
|
||||
|
||||
DARK_2 = (3, "dark2", "Specifies the Dark 2 theme color.")
|
||||
"""Specifies the Dark 2 theme color."""
|
||||
|
||||
FOLLOWED_HYPERLINK = (
|
||||
12,
|
||||
"followedHyperlink",
|
||||
"Specifies the theme color for a clicked hyperlink.",
|
||||
)
|
||||
"""Specifies the theme color for a clicked hyperlink."""
|
||||
|
||||
HYPERLINK = (11, "hyperlink", "Specifies the theme color for a hyperlink.")
|
||||
"""Specifies the theme color for a hyperlink."""
|
||||
|
||||
LIGHT_1 = (2, "light1", "Specifies the Light 1 theme color.")
|
||||
"""Specifies the Light 1 theme color."""
|
||||
|
||||
LIGHT_2 = (4, "light2", "Specifies the Light 2 theme color.")
|
||||
"""Specifies the Light 2 theme color."""
|
||||
|
||||
TEXT_1 = (13, "text1", "Specifies the Text 1 theme color.")
|
||||
"""Specifies the Text 1 theme color."""
|
||||
|
||||
TEXT_2 = (15, "text2", "Specifies the Text 2 theme color.")
|
||||
"""Specifies the Text 2 theme color."""
|
||||
|
||||
|
||||
MSO_THEME_COLOR = MSO_THEME_COLOR_INDEX
|
||||
@@ -0,0 +1,86 @@
|
||||
"""Enumerations related to the main document in WordprocessingML files."""
|
||||
|
||||
from .base import BaseXmlEnum
|
||||
|
||||
|
||||
class WD_HEADER_FOOTER_INDEX(BaseXmlEnum):
|
||||
"""Alias: **WD_HEADER_FOOTER**
|
||||
|
||||
Specifies one of the three possible header/footer definitions for a section.
|
||||
|
||||
For internal use only; not part of the python-docx API.
|
||||
|
||||
MS API name: `WdHeaderFooterIndex`
|
||||
URL: https://docs.microsoft.com/en-us/office/vba/api/word.wdheaderfooterindex
|
||||
"""
|
||||
|
||||
PRIMARY = (1, "default", "Header for odd pages or all if no even header.")
|
||||
"""Header for odd pages or all if no even header."""
|
||||
|
||||
FIRST_PAGE = (2, "first", "Header for first page of section.")
|
||||
"""Header for first page of section."""
|
||||
|
||||
EVEN_PAGE = (3, "even", "Header for even pages of recto/verso section.")
|
||||
"""Header for even pages of recto/verso section."""
|
||||
|
||||
|
||||
WD_HEADER_FOOTER = WD_HEADER_FOOTER_INDEX
|
||||
|
||||
|
||||
class WD_ORIENTATION(BaseXmlEnum):
|
||||
"""Alias: **WD_ORIENT**
|
||||
|
||||
Specifies the page layout orientation.
|
||||
|
||||
Example::
|
||||
|
||||
from docx.enum.section import WD_ORIENT
|
||||
|
||||
section = document.sections[-1] section.orientation = WD_ORIENT.LANDSCAPE
|
||||
|
||||
MS API name: `WdOrientation`
|
||||
MS API URL: http://msdn.microsoft.com/en-us/library/office/ff837902.aspx
|
||||
"""
|
||||
|
||||
PORTRAIT = (0, "portrait", "Portrait orientation.")
|
||||
"""Portrait orientation."""
|
||||
|
||||
LANDSCAPE = (1, "landscape", "Landscape orientation.")
|
||||
"""Landscape orientation."""
|
||||
|
||||
|
||||
WD_ORIENT = WD_ORIENTATION
|
||||
|
||||
|
||||
class WD_SECTION_START(BaseXmlEnum):
|
||||
"""Alias: **WD_SECTION**
|
||||
|
||||
Specifies the start type of a section break.
|
||||
|
||||
Example::
|
||||
|
||||
from docx.enum.section import WD_SECTION
|
||||
|
||||
section = document.sections[0] section.start_type = WD_SECTION.NEW_PAGE
|
||||
|
||||
MS API name: `WdSectionStart`
|
||||
MS API URL: http://msdn.microsoft.com/en-us/library/office/ff840975.aspx
|
||||
"""
|
||||
|
||||
CONTINUOUS = (0, "continuous", "Continuous section break.")
|
||||
"""Continuous section break."""
|
||||
|
||||
NEW_COLUMN = (1, "nextColumn", "New column section break.")
|
||||
"""New column section break."""
|
||||
|
||||
NEW_PAGE = (2, "nextPage", "New page section break.")
|
||||
"""New page section break."""
|
||||
|
||||
EVEN_PAGE = (3, "evenPage", "Even pages section break.")
|
||||
"""Even pages section break."""
|
||||
|
||||
ODD_PAGE = (4, "oddPage", "Section begins on next odd page.")
|
||||
"""Section begins on next odd page."""
|
||||
|
||||
|
||||
WD_SECTION = WD_SECTION_START
|
||||
19
path/to/venv/lib/python3.12/site-packages/docx/enum/shape.py
Normal file
19
path/to/venv/lib/python3.12/site-packages/docx/enum/shape.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""Enumerations related to DrawingML shapes in WordprocessingML files."""
|
||||
|
||||
import enum
|
||||
|
||||
|
||||
class WD_INLINE_SHAPE_TYPE(enum.Enum):
|
||||
"""Corresponds to WdInlineShapeType enumeration.
|
||||
|
||||
http://msdn.microsoft.com/en-us/library/office/ff192587.aspx.
|
||||
"""
|
||||
|
||||
CHART = 12
|
||||
LINKED_PICTURE = 4
|
||||
PICTURE = 3
|
||||
SMART_ART = 15
|
||||
NOT_IMPLEMENTED = -6
|
||||
|
||||
|
||||
WD_INLINE_SHAPE = WD_INLINE_SHAPE_TYPE
|
||||
452
path/to/venv/lib/python3.12/site-packages/docx/enum/style.py
Normal file
452
path/to/venv/lib/python3.12/site-packages/docx/enum/style.py
Normal file
@@ -0,0 +1,452 @@
|
||||
"""Enumerations related to styles."""
|
||||
|
||||
from .base import BaseEnum, BaseXmlEnum
|
||||
|
||||
|
||||
class WD_BUILTIN_STYLE(BaseEnum):
|
||||
"""Alias: **WD_STYLE**
|
||||
|
||||
Specifies a built-in Microsoft Word style.
|
||||
|
||||
Example::
|
||||
|
||||
from docx import Document
|
||||
from docx.enum.style import WD_STYLE
|
||||
|
||||
document = Document()
|
||||
styles = document.styles
|
||||
style = styles[WD_STYLE.BODY_TEXT]
|
||||
|
||||
|
||||
MS API name: `WdBuiltinStyle`
|
||||
|
||||
http://msdn.microsoft.com/en-us/library/office/ff835210.aspx
|
||||
"""
|
||||
|
||||
BLOCK_QUOTATION = (-85, "Block Text.")
|
||||
"""Block Text."""
|
||||
|
||||
BODY_TEXT = (-67, "Body Text.")
|
||||
"""Body Text."""
|
||||
|
||||
BODY_TEXT_2 = (-81, "Body Text 2.")
|
||||
"""Body Text 2."""
|
||||
|
||||
BODY_TEXT_3 = (-82, "Body Text 3.")
|
||||
"""Body Text 3."""
|
||||
|
||||
BODY_TEXT_FIRST_INDENT = (-78, "Body Text First Indent.")
|
||||
"""Body Text First Indent."""
|
||||
|
||||
BODY_TEXT_FIRST_INDENT_2 = (-79, "Body Text First Indent 2.")
|
||||
"""Body Text First Indent 2."""
|
||||
|
||||
BODY_TEXT_INDENT = (-68, "Body Text Indent.")
|
||||
"""Body Text Indent."""
|
||||
|
||||
BODY_TEXT_INDENT_2 = (-83, "Body Text Indent 2.")
|
||||
"""Body Text Indent 2."""
|
||||
|
||||
BODY_TEXT_INDENT_3 = (-84, "Body Text Indent 3.")
|
||||
"""Body Text Indent 3."""
|
||||
|
||||
BOOK_TITLE = (-265, "Book Title.")
|
||||
"""Book Title."""
|
||||
|
||||
CAPTION = (-35, "Caption.")
|
||||
"""Caption."""
|
||||
|
||||
CLOSING = (-64, "Closing.")
|
||||
"""Closing."""
|
||||
|
||||
COMMENT_REFERENCE = (-40, "Comment Reference.")
|
||||
"""Comment Reference."""
|
||||
|
||||
COMMENT_TEXT = (-31, "Comment Text.")
|
||||
"""Comment Text."""
|
||||
|
||||
DATE = (-77, "Date.")
|
||||
"""Date."""
|
||||
|
||||
DEFAULT_PARAGRAPH_FONT = (-66, "Default Paragraph Font.")
|
||||
"""Default Paragraph Font."""
|
||||
|
||||
EMPHASIS = (-89, "Emphasis.")
|
||||
"""Emphasis."""
|
||||
|
||||
ENDNOTE_REFERENCE = (-43, "Endnote Reference.")
|
||||
"""Endnote Reference."""
|
||||
|
||||
ENDNOTE_TEXT = (-44, "Endnote Text.")
|
||||
"""Endnote Text."""
|
||||
|
||||
ENVELOPE_ADDRESS = (-37, "Envelope Address.")
|
||||
"""Envelope Address."""
|
||||
|
||||
ENVELOPE_RETURN = (-38, "Envelope Return.")
|
||||
"""Envelope Return."""
|
||||
|
||||
FOOTER = (-33, "Footer.")
|
||||
"""Footer."""
|
||||
|
||||
FOOTNOTE_REFERENCE = (-39, "Footnote Reference.")
|
||||
"""Footnote Reference."""
|
||||
|
||||
FOOTNOTE_TEXT = (-30, "Footnote Text.")
|
||||
"""Footnote Text."""
|
||||
|
||||
HEADER = (-32, "Header.")
|
||||
"""Header."""
|
||||
|
||||
HEADING_1 = (-2, "Heading 1.")
|
||||
"""Heading 1."""
|
||||
|
||||
HEADING_2 = (-3, "Heading 2.")
|
||||
"""Heading 2."""
|
||||
|
||||
HEADING_3 = (-4, "Heading 3.")
|
||||
"""Heading 3."""
|
||||
|
||||
HEADING_4 = (-5, "Heading 4.")
|
||||
"""Heading 4."""
|
||||
|
||||
HEADING_5 = (-6, "Heading 5.")
|
||||
"""Heading 5."""
|
||||
|
||||
HEADING_6 = (-7, "Heading 6.")
|
||||
"""Heading 6."""
|
||||
|
||||
HEADING_7 = (-8, "Heading 7.")
|
||||
"""Heading 7."""
|
||||
|
||||
HEADING_8 = (-9, "Heading 8.")
|
||||
"""Heading 8."""
|
||||
|
||||
HEADING_9 = (-10, "Heading 9.")
|
||||
"""Heading 9."""
|
||||
|
||||
HTML_ACRONYM = (-96, "HTML Acronym.")
|
||||
"""HTML Acronym."""
|
||||
|
||||
HTML_ADDRESS = (-97, "HTML Address.")
|
||||
"""HTML Address."""
|
||||
|
||||
HTML_CITE = (-98, "HTML Cite.")
|
||||
"""HTML Cite."""
|
||||
|
||||
HTML_CODE = (-99, "HTML Code.")
|
||||
"""HTML Code."""
|
||||
|
||||
HTML_DFN = (-100, "HTML Definition.")
|
||||
"""HTML Definition."""
|
||||
|
||||
HTML_KBD = (-101, "HTML Keyboard.")
|
||||
"""HTML Keyboard."""
|
||||
|
||||
HTML_NORMAL = (-95, "Normal (Web).")
|
||||
"""Normal (Web)."""
|
||||
|
||||
HTML_PRE = (-102, "HTML Preformatted.")
|
||||
"""HTML Preformatted."""
|
||||
|
||||
HTML_SAMP = (-103, "HTML Sample.")
|
||||
"""HTML Sample."""
|
||||
|
||||
HTML_TT = (-104, "HTML Typewriter.")
|
||||
"""HTML Typewriter."""
|
||||
|
||||
HTML_VAR = (-105, "HTML Variable.")
|
||||
"""HTML Variable."""
|
||||
|
||||
HYPERLINK = (-86, "Hyperlink.")
|
||||
"""Hyperlink."""
|
||||
|
||||
HYPERLINK_FOLLOWED = (-87, "Followed Hyperlink.")
|
||||
"""Followed Hyperlink."""
|
||||
|
||||
INDEX_1 = (-11, "Index 1.")
|
||||
"""Index 1."""
|
||||
|
||||
INDEX_2 = (-12, "Index 2.")
|
||||
"""Index 2."""
|
||||
|
||||
INDEX_3 = (-13, "Index 3.")
|
||||
"""Index 3."""
|
||||
|
||||
INDEX_4 = (-14, "Index 4.")
|
||||
"""Index 4."""
|
||||
|
||||
INDEX_5 = (-15, "Index 5.")
|
||||
"""Index 5."""
|
||||
|
||||
INDEX_6 = (-16, "Index 6.")
|
||||
"""Index 6."""
|
||||
|
||||
INDEX_7 = (-17, "Index 7.")
|
||||
"""Index 7."""
|
||||
|
||||
INDEX_8 = (-18, "Index 8.")
|
||||
"""Index 8."""
|
||||
|
||||
INDEX_9 = (-19, "Index 9.")
|
||||
"""Index 9."""
|
||||
|
||||
INDEX_HEADING = (-34, "Index Heading")
|
||||
"""Index Heading"""
|
||||
|
||||
INTENSE_EMPHASIS = (-262, "Intense Emphasis.")
|
||||
"""Intense Emphasis."""
|
||||
|
||||
INTENSE_QUOTE = (-182, "Intense Quote.")
|
||||
"""Intense Quote."""
|
||||
|
||||
INTENSE_REFERENCE = (-264, "Intense Reference.")
|
||||
"""Intense Reference."""
|
||||
|
||||
LINE_NUMBER = (-41, "Line Number.")
|
||||
"""Line Number."""
|
||||
|
||||
LIST = (-48, "List.")
|
||||
"""List."""
|
||||
|
||||
LIST_2 = (-51, "List 2.")
|
||||
"""List 2."""
|
||||
|
||||
LIST_3 = (-52, "List 3.")
|
||||
"""List 3."""
|
||||
|
||||
LIST_4 = (-53, "List 4.")
|
||||
"""List 4."""
|
||||
|
||||
LIST_5 = (-54, "List 5.")
|
||||
"""List 5."""
|
||||
|
||||
LIST_BULLET = (-49, "List Bullet.")
|
||||
"""List Bullet."""
|
||||
|
||||
LIST_BULLET_2 = (-55, "List Bullet 2.")
|
||||
"""List Bullet 2."""
|
||||
|
||||
LIST_BULLET_3 = (-56, "List Bullet 3.")
|
||||
"""List Bullet 3."""
|
||||
|
||||
LIST_BULLET_4 = (-57, "List Bullet 4.")
|
||||
"""List Bullet 4."""
|
||||
|
||||
LIST_BULLET_5 = (-58, "List Bullet 5.")
|
||||
"""List Bullet 5."""
|
||||
|
||||
LIST_CONTINUE = (-69, "List Continue.")
|
||||
"""List Continue."""
|
||||
|
||||
LIST_CONTINUE_2 = (-70, "List Continue 2.")
|
||||
"""List Continue 2."""
|
||||
|
||||
LIST_CONTINUE_3 = (-71, "List Continue 3.")
|
||||
"""List Continue 3."""
|
||||
|
||||
LIST_CONTINUE_4 = (-72, "List Continue 4.")
|
||||
"""List Continue 4."""
|
||||
|
||||
LIST_CONTINUE_5 = (-73, "List Continue 5.")
|
||||
"""List Continue 5."""
|
||||
|
||||
LIST_NUMBER = (-50, "List Number.")
|
||||
"""List Number."""
|
||||
|
||||
LIST_NUMBER_2 = (-59, "List Number 2.")
|
||||
"""List Number 2."""
|
||||
|
||||
LIST_NUMBER_3 = (-60, "List Number 3.")
|
||||
"""List Number 3."""
|
||||
|
||||
LIST_NUMBER_4 = (-61, "List Number 4.")
|
||||
"""List Number 4."""
|
||||
|
||||
LIST_NUMBER_5 = (-62, "List Number 5.")
|
||||
"""List Number 5."""
|
||||
|
||||
LIST_PARAGRAPH = (-180, "List Paragraph.")
|
||||
"""List Paragraph."""
|
||||
|
||||
MACRO_TEXT = (-46, "Macro Text.")
|
||||
"""Macro Text."""
|
||||
|
||||
MESSAGE_HEADER = (-74, "Message Header.")
|
||||
"""Message Header."""
|
||||
|
||||
NAV_PANE = (-90, "Document Map.")
|
||||
"""Document Map."""
|
||||
|
||||
NORMAL = (-1, "Normal.")
|
||||
"""Normal."""
|
||||
|
||||
NORMAL_INDENT = (-29, "Normal Indent.")
|
||||
"""Normal Indent."""
|
||||
|
||||
NORMAL_OBJECT = (-158, "Normal (applied to an object).")
|
||||
"""Normal (applied to an object)."""
|
||||
|
||||
NORMAL_TABLE = (-106, "Normal (applied within a table).")
|
||||
"""Normal (applied within a table)."""
|
||||
|
||||
NOTE_HEADING = (-80, "Note Heading.")
|
||||
"""Note Heading."""
|
||||
|
||||
PAGE_NUMBER = (-42, "Page Number.")
|
||||
"""Page Number."""
|
||||
|
||||
PLAIN_TEXT = (-91, "Plain Text.")
|
||||
"""Plain Text."""
|
||||
|
||||
QUOTE = (-181, "Quote.")
|
||||
"""Quote."""
|
||||
|
||||
SALUTATION = (-76, "Salutation.")
|
||||
"""Salutation."""
|
||||
|
||||
SIGNATURE = (-65, "Signature.")
|
||||
"""Signature."""
|
||||
|
||||
STRONG = (-88, "Strong.")
|
||||
"""Strong."""
|
||||
|
||||
SUBTITLE = (-75, "Subtitle.")
|
||||
"""Subtitle."""
|
||||
|
||||
SUBTLE_EMPHASIS = (-261, "Subtle Emphasis.")
|
||||
"""Subtle Emphasis."""
|
||||
|
||||
SUBTLE_REFERENCE = (-263, "Subtle Reference.")
|
||||
"""Subtle Reference."""
|
||||
|
||||
TABLE_COLORFUL_GRID = (-172, "Colorful Grid.")
|
||||
"""Colorful Grid."""
|
||||
|
||||
TABLE_COLORFUL_LIST = (-171, "Colorful List.")
|
||||
"""Colorful List."""
|
||||
|
||||
TABLE_COLORFUL_SHADING = (-170, "Colorful Shading.")
|
||||
"""Colorful Shading."""
|
||||
|
||||
TABLE_DARK_LIST = (-169, "Dark List.")
|
||||
"""Dark List."""
|
||||
|
||||
TABLE_LIGHT_GRID = (-161, "Light Grid.")
|
||||
"""Light Grid."""
|
||||
|
||||
TABLE_LIGHT_GRID_ACCENT_1 = (-175, "Light Grid Accent 1.")
|
||||
"""Light Grid Accent 1."""
|
||||
|
||||
TABLE_LIGHT_LIST = (-160, "Light List.")
|
||||
"""Light List."""
|
||||
|
||||
TABLE_LIGHT_LIST_ACCENT_1 = (-174, "Light List Accent 1.")
|
||||
"""Light List Accent 1."""
|
||||
|
||||
TABLE_LIGHT_SHADING = (-159, "Light Shading.")
|
||||
"""Light Shading."""
|
||||
|
||||
TABLE_LIGHT_SHADING_ACCENT_1 = (-173, "Light Shading Accent 1.")
|
||||
"""Light Shading Accent 1."""
|
||||
|
||||
TABLE_MEDIUM_GRID_1 = (-166, "Medium Grid 1.")
|
||||
"""Medium Grid 1."""
|
||||
|
||||
TABLE_MEDIUM_GRID_2 = (-167, "Medium Grid 2.")
|
||||
"""Medium Grid 2."""
|
||||
|
||||
TABLE_MEDIUM_GRID_3 = (-168, "Medium Grid 3.")
|
||||
"""Medium Grid 3."""
|
||||
|
||||
TABLE_MEDIUM_LIST_1 = (-164, "Medium List 1.")
|
||||
"""Medium List 1."""
|
||||
|
||||
TABLE_MEDIUM_LIST_1_ACCENT_1 = (-178, "Medium List 1 Accent 1.")
|
||||
"""Medium List 1 Accent 1."""
|
||||
|
||||
TABLE_MEDIUM_LIST_2 = (-165, "Medium List 2.")
|
||||
"""Medium List 2."""
|
||||
|
||||
TABLE_MEDIUM_SHADING_1 = (-162, "Medium Shading 1.")
|
||||
"""Medium Shading 1."""
|
||||
|
||||
TABLE_MEDIUM_SHADING_1_ACCENT_1 = (-176, "Medium Shading 1 Accent 1.")
|
||||
"""Medium Shading 1 Accent 1."""
|
||||
|
||||
TABLE_MEDIUM_SHADING_2 = (-163, "Medium Shading 2.")
|
||||
"""Medium Shading 2."""
|
||||
|
||||
TABLE_MEDIUM_SHADING_2_ACCENT_1 = (-177, "Medium Shading 2 Accent 1.")
|
||||
"""Medium Shading 2 Accent 1."""
|
||||
|
||||
TABLE_OF_AUTHORITIES = (-45, "Table of Authorities.")
|
||||
"""Table of Authorities."""
|
||||
|
||||
TABLE_OF_FIGURES = (-36, "Table of Figures.")
|
||||
"""Table of Figures."""
|
||||
|
||||
TITLE = (-63, "Title.")
|
||||
"""Title."""
|
||||
|
||||
TOAHEADING = (-47, "TOA Heading.")
|
||||
"""TOA Heading."""
|
||||
|
||||
TOC_1 = (-20, "TOC 1.")
|
||||
"""TOC 1."""
|
||||
|
||||
TOC_2 = (-21, "TOC 2.")
|
||||
"""TOC 2."""
|
||||
|
||||
TOC_3 = (-22, "TOC 3.")
|
||||
"""TOC 3."""
|
||||
|
||||
TOC_4 = (-23, "TOC 4.")
|
||||
"""TOC 4."""
|
||||
|
||||
TOC_5 = (-24, "TOC 5.")
|
||||
"""TOC 5."""
|
||||
|
||||
TOC_6 = (-25, "TOC 6.")
|
||||
"""TOC 6."""
|
||||
|
||||
TOC_7 = (-26, "TOC 7.")
|
||||
"""TOC 7."""
|
||||
|
||||
TOC_8 = (-27, "TOC 8.")
|
||||
"""TOC 8."""
|
||||
|
||||
TOC_9 = (-28, "TOC 9.")
|
||||
"""TOC 9."""
|
||||
|
||||
|
||||
WD_STYLE = WD_BUILTIN_STYLE
|
||||
|
||||
|
||||
class WD_STYLE_TYPE(BaseXmlEnum):
|
||||
"""Specifies one of the four style types: paragraph, character, list, or table.
|
||||
|
||||
Example::
|
||||
|
||||
from docx import Document
|
||||
from docx.enum.style import WD_STYLE_TYPE
|
||||
|
||||
styles = Document().styles
|
||||
assert styles[0].type == WD_STYLE_TYPE.PARAGRAPH
|
||||
|
||||
MS API name: `WdStyleType`
|
||||
|
||||
http://msdn.microsoft.com/en-us/library/office/ff196870.aspx
|
||||
"""
|
||||
|
||||
CHARACTER = (2, "character", "Character style.")
|
||||
"""Character style."""
|
||||
|
||||
LIST = (4, "numbering", "List style.")
|
||||
"""List style."""
|
||||
|
||||
PARAGRAPH = (1, "paragraph", "Paragraph style.")
|
||||
"""Paragraph style."""
|
||||
|
||||
TABLE = (3, "table", "Table style.")
|
||||
"""Table style."""
|
||||
136
path/to/venv/lib/python3.12/site-packages/docx/enum/table.py
Normal file
136
path/to/venv/lib/python3.12/site-packages/docx/enum/table.py
Normal file
@@ -0,0 +1,136 @@
|
||||
"""Enumerations related to tables in WordprocessingML files."""
|
||||
|
||||
from docx.enum.base import BaseEnum, BaseXmlEnum
|
||||
|
||||
|
||||
class WD_CELL_VERTICAL_ALIGNMENT(BaseXmlEnum):
|
||||
"""Alias: **WD_ALIGN_VERTICAL**
|
||||
|
||||
Specifies the vertical alignment of text in one or more cells of a table.
|
||||
|
||||
Example::
|
||||
|
||||
from docx.enum.table import WD_ALIGN_VERTICAL
|
||||
|
||||
table = document.add_table(3, 3)
|
||||
table.cell(0, 0).vertical_alignment = WD_ALIGN_VERTICAL.BOTTOM
|
||||
|
||||
MS API name: `WdCellVerticalAlignment`
|
||||
|
||||
https://msdn.microsoft.com/en-us/library/office/ff193345.aspx
|
||||
"""
|
||||
|
||||
TOP = (0, "top", "Text is aligned to the top border of the cell.")
|
||||
"""Text is aligned to the top border of the cell."""
|
||||
|
||||
CENTER = (1, "center", "Text is aligned to the center of the cell.")
|
||||
"""Text is aligned to the center of the cell."""
|
||||
|
||||
BOTTOM = (3, "bottom", "Text is aligned to the bottom border of the cell.")
|
||||
"""Text is aligned to the bottom border of the cell."""
|
||||
|
||||
BOTH = (
|
||||
101,
|
||||
"both",
|
||||
"This is an option in the OpenXml spec, but not in Word itself. It's not"
|
||||
" clear what Word behavior this setting produces. If you find out please"
|
||||
" let us know and we'll update this documentation. Otherwise, probably best"
|
||||
" to avoid this option.",
|
||||
)
|
||||
"""This is an option in the OpenXml spec, but not in Word itself.
|
||||
|
||||
It's not clear what Word behavior this setting produces. If you find out please let
|
||||
us know and we'll update this documentation. Otherwise, probably best to avoid this
|
||||
option.
|
||||
"""
|
||||
|
||||
|
||||
WD_ALIGN_VERTICAL = WD_CELL_VERTICAL_ALIGNMENT
|
||||
|
||||
|
||||
class WD_ROW_HEIGHT_RULE(BaseXmlEnum):
|
||||
"""Alias: **WD_ROW_HEIGHT**
|
||||
|
||||
Specifies the rule for determining the height of a table row
|
||||
|
||||
Example::
|
||||
|
||||
from docx.enum.table import WD_ROW_HEIGHT_RULE
|
||||
|
||||
table = document.add_table(3, 3)
|
||||
table.rows[0].height_rule = WD_ROW_HEIGHT_RULE.EXACTLY
|
||||
|
||||
MS API name: `WdRowHeightRule`
|
||||
|
||||
https://msdn.microsoft.com/en-us/library/office/ff193620.aspx
|
||||
"""
|
||||
|
||||
AUTO = (
|
||||
0,
|
||||
"auto",
|
||||
"The row height is adjusted to accommodate the tallest value in the row.",
|
||||
)
|
||||
"""The row height is adjusted to accommodate the tallest value in the row."""
|
||||
|
||||
AT_LEAST = (1, "atLeast", "The row height is at least a minimum specified value.")
|
||||
"""The row height is at least a minimum specified value."""
|
||||
|
||||
EXACTLY = (2, "exact", "The row height is an exact value.")
|
||||
"""The row height is an exact value."""
|
||||
|
||||
|
||||
WD_ROW_HEIGHT = WD_ROW_HEIGHT_RULE
|
||||
|
||||
|
||||
class WD_TABLE_ALIGNMENT(BaseXmlEnum):
|
||||
"""Specifies table justification type.
|
||||
|
||||
Example::
|
||||
|
||||
from docx.enum.table import WD_TABLE_ALIGNMENT
|
||||
|
||||
table = document.add_table(3, 3)
|
||||
table.alignment = WD_TABLE_ALIGNMENT.CENTER
|
||||
|
||||
MS API name: `WdRowAlignment`
|
||||
|
||||
http://office.microsoft.com/en-us/word-help/HV080607259.aspx
|
||||
"""
|
||||
|
||||
LEFT = (0, "left", "Left-aligned")
|
||||
"""Left-aligned"""
|
||||
|
||||
CENTER = (1, "center", "Center-aligned.")
|
||||
"""Center-aligned."""
|
||||
|
||||
RIGHT = (2, "right", "Right-aligned.")
|
||||
"""Right-aligned."""
|
||||
|
||||
|
||||
class WD_TABLE_DIRECTION(BaseEnum):
|
||||
"""Specifies the direction in which an application orders cells in the specified
|
||||
table or row.
|
||||
|
||||
Example::
|
||||
|
||||
from docx.enum.table import WD_TABLE_DIRECTION
|
||||
|
||||
table = document.add_table(3, 3)
|
||||
table.direction = WD_TABLE_DIRECTION.RTL
|
||||
|
||||
MS API name: `WdTableDirection`
|
||||
|
||||
http://msdn.microsoft.com/en-us/library/ff835141.aspx
|
||||
"""
|
||||
|
||||
LTR = (
|
||||
0,
|
||||
"The table or row is arranged with the first column in the leftmost position.",
|
||||
)
|
||||
"""The table or row is arranged with the first column in the leftmost position."""
|
||||
|
||||
RTL = (
|
||||
1,
|
||||
"The table or row is arranged with the first column in the rightmost position.",
|
||||
)
|
||||
"""The table or row is arranged with the first column in the rightmost position."""
|
||||
367
path/to/venv/lib/python3.12/site-packages/docx/enum/text.py
Normal file
367
path/to/venv/lib/python3.12/site-packages/docx/enum/text.py
Normal file
@@ -0,0 +1,367 @@
|
||||
"""Enumerations related to text in WordprocessingML files."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import enum
|
||||
|
||||
from docx.enum.base import BaseXmlEnum
|
||||
|
||||
|
||||
class WD_PARAGRAPH_ALIGNMENT(BaseXmlEnum):
|
||||
"""Alias: **WD_ALIGN_PARAGRAPH**
|
||||
|
||||
Specifies paragraph justification type.
|
||||
|
||||
Example::
|
||||
|
||||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||||
|
||||
paragraph = document.add_paragraph()
|
||||
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||||
"""
|
||||
|
||||
LEFT = (0, "left", "Left-aligned")
|
||||
"""Left-aligned"""
|
||||
|
||||
CENTER = (1, "center", "Center-aligned.")
|
||||
"""Center-aligned."""
|
||||
|
||||
RIGHT = (2, "right", "Right-aligned.")
|
||||
"""Right-aligned."""
|
||||
|
||||
JUSTIFY = (3, "both", "Fully justified.")
|
||||
"""Fully justified."""
|
||||
|
||||
DISTRIBUTE = (
|
||||
4,
|
||||
"distribute",
|
||||
"Paragraph characters are distributed to fill entire width of paragraph.",
|
||||
)
|
||||
"""Paragraph characters are distributed to fill entire width of paragraph."""
|
||||
|
||||
JUSTIFY_MED = (
|
||||
5,
|
||||
"mediumKashida",
|
||||
"Justified with a medium character compression ratio.",
|
||||
)
|
||||
"""Justified with a medium character compression ratio."""
|
||||
|
||||
JUSTIFY_HI = (
|
||||
7,
|
||||
"highKashida",
|
||||
"Justified with a high character compression ratio.",
|
||||
)
|
||||
"""Justified with a high character compression ratio."""
|
||||
|
||||
JUSTIFY_LOW = (8, "lowKashida", "Justified with a low character compression ratio.")
|
||||
"""Justified with a low character compression ratio."""
|
||||
|
||||
THAI_JUSTIFY = (
|
||||
9,
|
||||
"thaiDistribute",
|
||||
"Justified according to Thai formatting layout.",
|
||||
)
|
||||
"""Justified according to Thai formatting layout."""
|
||||
|
||||
|
||||
WD_ALIGN_PARAGRAPH = WD_PARAGRAPH_ALIGNMENT
|
||||
|
||||
|
||||
class WD_BREAK_TYPE(enum.Enum):
|
||||
"""Corresponds to WdBreakType enumeration.
|
||||
|
||||
http://msdn.microsoft.com/en-us/library/office/ff195905.aspx.
|
||||
"""
|
||||
|
||||
COLUMN = 8
|
||||
LINE = 6
|
||||
LINE_CLEAR_LEFT = 9
|
||||
LINE_CLEAR_RIGHT = 10
|
||||
LINE_CLEAR_ALL = 11 # -- added for consistency, not in MS version --
|
||||
PAGE = 7
|
||||
SECTION_CONTINUOUS = 3
|
||||
SECTION_EVEN_PAGE = 4
|
||||
SECTION_NEXT_PAGE = 2
|
||||
SECTION_ODD_PAGE = 5
|
||||
TEXT_WRAPPING = 11
|
||||
|
||||
|
||||
WD_BREAK = WD_BREAK_TYPE
|
||||
|
||||
|
||||
class WD_COLOR_INDEX(BaseXmlEnum):
|
||||
"""Specifies a standard preset color to apply.
|
||||
|
||||
Used for font highlighting and perhaps other applications.
|
||||
|
||||
* MS API name: `WdColorIndex`
|
||||
* URL: https://msdn.microsoft.com/EN-US/library/office/ff195343.aspx
|
||||
"""
|
||||
|
||||
INHERITED = (-1, None, "Color is inherited from the style hierarchy.")
|
||||
"""Color is inherited from the style hierarchy."""
|
||||
|
||||
AUTO = (0, "default", "Automatic color. Default; usually black.")
|
||||
"""Automatic color. Default; usually black."""
|
||||
|
||||
BLACK = (1, "black", "Black color.")
|
||||
"""Black color."""
|
||||
|
||||
BLUE = (2, "blue", "Blue color")
|
||||
"""Blue color"""
|
||||
|
||||
BRIGHT_GREEN = (4, "green", "Bright green color.")
|
||||
"""Bright green color."""
|
||||
|
||||
DARK_BLUE = (9, "darkBlue", "Dark blue color.")
|
||||
"""Dark blue color."""
|
||||
|
||||
DARK_RED = (13, "darkRed", "Dark red color.")
|
||||
"""Dark red color."""
|
||||
|
||||
DARK_YELLOW = (14, "darkYellow", "Dark yellow color.")
|
||||
"""Dark yellow color."""
|
||||
|
||||
GRAY_25 = (16, "lightGray", "25% shade of gray color.")
|
||||
"""25% shade of gray color."""
|
||||
|
||||
GRAY_50 = (15, "darkGray", "50% shade of gray color.")
|
||||
"""50% shade of gray color."""
|
||||
|
||||
GREEN = (11, "darkGreen", "Green color.")
|
||||
"""Green color."""
|
||||
|
||||
PINK = (5, "magenta", "Pink color.")
|
||||
"""Pink color."""
|
||||
|
||||
RED = (6, "red", "Red color.")
|
||||
"""Red color."""
|
||||
|
||||
TEAL = (10, "darkCyan", "Teal color.")
|
||||
"""Teal color."""
|
||||
|
||||
TURQUOISE = (3, "cyan", "Turquoise color.")
|
||||
"""Turquoise color."""
|
||||
|
||||
VIOLET = (12, "darkMagenta", "Violet color.")
|
||||
"""Violet color."""
|
||||
|
||||
WHITE = (8, "white", "White color.")
|
||||
"""White color."""
|
||||
|
||||
YELLOW = (7, "yellow", "Yellow color.")
|
||||
"""Yellow color."""
|
||||
|
||||
|
||||
WD_COLOR = WD_COLOR_INDEX
|
||||
|
||||
|
||||
class WD_LINE_SPACING(BaseXmlEnum):
|
||||
"""Specifies a line spacing format to be applied to a paragraph.
|
||||
|
||||
Example::
|
||||
|
||||
from docx.enum.text import WD_LINE_SPACING
|
||||
|
||||
paragraph = document.add_paragraph()
|
||||
paragraph.line_spacing_rule = WD_LINE_SPACING.EXACTLY
|
||||
|
||||
|
||||
MS API name: `WdLineSpacing`
|
||||
|
||||
URL: http://msdn.microsoft.com/en-us/library/office/ff844910.aspx
|
||||
"""
|
||||
|
||||
SINGLE = (0, "UNMAPPED", "Single spaced (default).")
|
||||
"""Single spaced (default)."""
|
||||
|
||||
ONE_POINT_FIVE = (1, "UNMAPPED", "Space-and-a-half line spacing.")
|
||||
"""Space-and-a-half line spacing."""
|
||||
|
||||
DOUBLE = (2, "UNMAPPED", "Double spaced.")
|
||||
"""Double spaced."""
|
||||
|
||||
AT_LEAST = (
|
||||
3,
|
||||
"atLeast",
|
||||
"Minimum line spacing is specified amount. Amount is specified separately.",
|
||||
)
|
||||
"""Minimum line spacing is specified amount. Amount is specified separately."""
|
||||
|
||||
EXACTLY = (
|
||||
4,
|
||||
"exact",
|
||||
"Line spacing is exactly specified amount. Amount is specified separately.",
|
||||
)
|
||||
"""Line spacing is exactly specified amount. Amount is specified separately."""
|
||||
|
||||
MULTIPLE = (
|
||||
5,
|
||||
"auto",
|
||||
"Line spacing is specified as multiple of line heights. Changing font size"
|
||||
" will change line spacing proportionately.",
|
||||
)
|
||||
"""Line spacing is specified as multiple of line heights. Changing font size will
|
||||
change the line spacing proportionately."""
|
||||
|
||||
|
||||
class WD_TAB_ALIGNMENT(BaseXmlEnum):
|
||||
"""Specifies the tab stop alignment to apply.
|
||||
|
||||
MS API name: `WdTabAlignment`
|
||||
|
||||
URL: https://msdn.microsoft.com/EN-US/library/office/ff195609.aspx
|
||||
"""
|
||||
|
||||
LEFT = (0, "left", "Left-aligned.")
|
||||
"""Left-aligned."""
|
||||
|
||||
CENTER = (1, "center", "Center-aligned.")
|
||||
"""Center-aligned."""
|
||||
|
||||
RIGHT = (2, "right", "Right-aligned.")
|
||||
"""Right-aligned."""
|
||||
|
||||
DECIMAL = (3, "decimal", "Decimal-aligned.")
|
||||
"""Decimal-aligned."""
|
||||
|
||||
BAR = (4, "bar", "Bar-aligned.")
|
||||
"""Bar-aligned."""
|
||||
|
||||
LIST = (6, "list", "List-aligned. (deprecated)")
|
||||
"""List-aligned. (deprecated)"""
|
||||
|
||||
CLEAR = (101, "clear", "Clear an inherited tab stop.")
|
||||
"""Clear an inherited tab stop."""
|
||||
|
||||
END = (102, "end", "Right-aligned. (deprecated)")
|
||||
"""Right-aligned. (deprecated)"""
|
||||
|
||||
NUM = (103, "num", "Left-aligned. (deprecated)")
|
||||
"""Left-aligned. (deprecated)"""
|
||||
|
||||
START = (104, "start", "Left-aligned. (deprecated)")
|
||||
"""Left-aligned. (deprecated)"""
|
||||
|
||||
|
||||
class WD_TAB_LEADER(BaseXmlEnum):
|
||||
"""Specifies the character to use as the leader with formatted tabs.
|
||||
|
||||
MS API name: `WdTabLeader`
|
||||
|
||||
URL: https://msdn.microsoft.com/en-us/library/office/ff845050.aspx
|
||||
"""
|
||||
|
||||
SPACES = (0, "none", "Spaces. Default.")
|
||||
"""Spaces. Default."""
|
||||
|
||||
DOTS = (1, "dot", "Dots.")
|
||||
"""Dots."""
|
||||
|
||||
DASHES = (2, "hyphen", "Dashes.")
|
||||
"""Dashes."""
|
||||
|
||||
LINES = (3, "underscore", "Double lines.")
|
||||
"""Double lines."""
|
||||
|
||||
HEAVY = (4, "heavy", "A heavy line.")
|
||||
"""A heavy line."""
|
||||
|
||||
MIDDLE_DOT = (5, "middleDot", "A vertically-centered dot.")
|
||||
"""A vertically-centered dot."""
|
||||
|
||||
|
||||
class WD_UNDERLINE(BaseXmlEnum):
|
||||
"""Specifies the style of underline applied to a run of characters.
|
||||
|
||||
MS API name: `WdUnderline`
|
||||
|
||||
URL: http://msdn.microsoft.com/en-us/library/office/ff822388.aspx
|
||||
"""
|
||||
|
||||
INHERITED = (-1, None, "Inherit underline setting from containing paragraph.")
|
||||
"""Inherit underline setting from containing paragraph."""
|
||||
|
||||
NONE = (
|
||||
0,
|
||||
"none",
|
||||
"No underline.\n\nThis setting overrides any inherited underline value, so can"
|
||||
" be used to remove underline from a run that inherits underlining from its"
|
||||
" containing paragraph. Note this is not the same as assigning |None| to"
|
||||
" Run.underline. |None| is a valid assignment value, but causes the run to"
|
||||
" inherit its underline value. Assigning `WD_UNDERLINE.NONE` causes"
|
||||
" underlining to be unconditionally turned off.",
|
||||
)
|
||||
"""No underline.
|
||||
|
||||
This setting overrides any inherited underline value, so can be used to remove
|
||||
underline from a run that inherits underlining from its containing paragraph. Note
|
||||
this is not the same as assigning |None| to Run.underline. |None| is a valid
|
||||
assignment value, but causes the run to inherit its underline value. Assigning
|
||||
``WD_UNDERLINE.NONE`` causes underlining to be unconditionally turned off.
|
||||
"""
|
||||
|
||||
SINGLE = (
|
||||
1,
|
||||
"single",
|
||||
"A single line.\n\nNote that this setting is write-only in the sense that"
|
||||
" |True| (rather than `WD_UNDERLINE.SINGLE`) is returned for a run having"
|
||||
" this setting.",
|
||||
)
|
||||
"""A single line.
|
||||
|
||||
Note that this setting is write-only in the sense that |True|
|
||||
(rather than ``WD_UNDERLINE.SINGLE``) is returned for a run having this setting.
|
||||
"""
|
||||
|
||||
WORDS = (2, "words", "Underline individual words only.")
|
||||
"""Underline individual words only."""
|
||||
|
||||
DOUBLE = (3, "double", "A double line.")
|
||||
"""A double line."""
|
||||
|
||||
DOTTED = (4, "dotted", "Dots.")
|
||||
"""Dots."""
|
||||
|
||||
THICK = (6, "thick", "A single thick line.")
|
||||
"""A single thick line."""
|
||||
|
||||
DASH = (7, "dash", "Dashes.")
|
||||
"""Dashes."""
|
||||
|
||||
DOT_DASH = (9, "dotDash", "Alternating dots and dashes.")
|
||||
"""Alternating dots and dashes."""
|
||||
|
||||
DOT_DOT_DASH = (10, "dotDotDash", "An alternating dot-dot-dash pattern.")
|
||||
"""An alternating dot-dot-dash pattern."""
|
||||
|
||||
WAVY = (11, "wave", "A single wavy line.")
|
||||
"""A single wavy line."""
|
||||
|
||||
DOTTED_HEAVY = (20, "dottedHeavy", "Heavy dots.")
|
||||
"""Heavy dots."""
|
||||
|
||||
DASH_HEAVY = (23, "dashedHeavy", "Heavy dashes.")
|
||||
"""Heavy dashes."""
|
||||
|
||||
DOT_DASH_HEAVY = (25, "dashDotHeavy", "Alternating heavy dots and heavy dashes.")
|
||||
"""Alternating heavy dots and heavy dashes."""
|
||||
|
||||
DOT_DOT_DASH_HEAVY = (
|
||||
26,
|
||||
"dashDotDotHeavy",
|
||||
"An alternating heavy dot-dot-dash pattern.",
|
||||
)
|
||||
"""An alternating heavy dot-dot-dash pattern."""
|
||||
|
||||
WAVY_HEAVY = (27, "wavyHeavy", "A heavy wavy line.")
|
||||
"""A heavy wavy line."""
|
||||
|
||||
DASH_LONG = (39, "dashLong", "Long dashes.")
|
||||
"""Long dashes."""
|
||||
|
||||
WAVY_DOUBLE = (43, "wavyDouble", "A double wavy line.")
|
||||
"""A double wavy line."""
|
||||
|
||||
DASH_LONG_HEAVY = (55, "dashLongHeavy", "Long heavy dashes.")
|
||||
"""Long heavy dashes."""
|
||||
18
path/to/venv/lib/python3.12/site-packages/docx/exceptions.py
Normal file
18
path/to/venv/lib/python3.12/site-packages/docx/exceptions.py
Normal file
@@ -0,0 +1,18 @@
|
||||
"""Exceptions used with python-docx.
|
||||
|
||||
The base exception class is PythonDocxError.
|
||||
"""
|
||||
|
||||
|
||||
class PythonDocxError(Exception):
|
||||
"""Generic error class."""
|
||||
|
||||
|
||||
class InvalidSpanError(PythonDocxError):
|
||||
"""Raised when an invalid merge region is specified in a request to merge table
|
||||
cells."""
|
||||
|
||||
|
||||
class InvalidXmlError(PythonDocxError):
|
||||
"""Raised when invalid XML is encountered, such as on attempt to access a missing
|
||||
required child element."""
|
||||
@@ -0,0 +1,23 @@
|
||||
"""Provides objects that can characterize image streams.
|
||||
|
||||
That characterization is as to content type and size, as a required step in including
|
||||
them in a document.
|
||||
"""
|
||||
|
||||
from docx.image.bmp import Bmp
|
||||
from docx.image.gif import Gif
|
||||
from docx.image.jpeg import Exif, Jfif
|
||||
from docx.image.png import Png
|
||||
from docx.image.tiff import Tiff
|
||||
|
||||
SIGNATURES = (
|
||||
# class, offset, signature_bytes
|
||||
(Png, 0, b"\x89PNG\x0d\x0a\x1a\x0a"),
|
||||
(Jfif, 6, b"JFIF"),
|
||||
(Exif, 6, b"Exif"),
|
||||
(Gif, 0, b"GIF87a"),
|
||||
(Gif, 0, b"GIF89a"),
|
||||
(Tiff, 0, b"MM\x00*"), # big-endian (Motorola) TIFF
|
||||
(Tiff, 0, b"II*\x00"), # little-endian (Intel) TIFF
|
||||
(Bmp, 0, b"BM"),
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
43
path/to/venv/lib/python3.12/site-packages/docx/image/bmp.py
Normal file
43
path/to/venv/lib/python3.12/site-packages/docx/image/bmp.py
Normal file
@@ -0,0 +1,43 @@
|
||||
from .constants import MIME_TYPE
|
||||
from .helpers import LITTLE_ENDIAN, StreamReader
|
||||
from .image import BaseImageHeader
|
||||
|
||||
|
||||
class Bmp(BaseImageHeader):
|
||||
"""Image header parser for BMP images."""
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream):
|
||||
"""Return |Bmp| instance having header properties parsed from the BMP image in
|
||||
`stream`."""
|
||||
stream_rdr = StreamReader(stream, LITTLE_ENDIAN)
|
||||
|
||||
px_width = stream_rdr.read_long(0x12)
|
||||
px_height = stream_rdr.read_long(0x16)
|
||||
|
||||
horz_px_per_meter = stream_rdr.read_long(0x26)
|
||||
vert_px_per_meter = stream_rdr.read_long(0x2A)
|
||||
|
||||
horz_dpi = cls._dpi(horz_px_per_meter)
|
||||
vert_dpi = cls._dpi(vert_px_per_meter)
|
||||
|
||||
return cls(px_width, px_height, horz_dpi, vert_dpi)
|
||||
|
||||
@property
|
||||
def content_type(self):
|
||||
"""MIME content type for this image, unconditionally `image/bmp` for BMP
|
||||
images."""
|
||||
return MIME_TYPE.BMP
|
||||
|
||||
@property
|
||||
def default_ext(self):
|
||||
"""Default filename extension, always 'bmp' for BMP images."""
|
||||
return "bmp"
|
||||
|
||||
@staticmethod
|
||||
def _dpi(px_per_meter):
|
||||
"""Return the integer pixels per inch from `px_per_meter`, defaulting to 96 if
|
||||
`px_per_meter` is zero."""
|
||||
if px_per_meter == 0:
|
||||
return 96
|
||||
return int(round(px_per_meter * 0.0254))
|
||||
@@ -0,0 +1,172 @@
|
||||
"""Constants specific the the image sub-package."""
|
||||
|
||||
|
||||
class JPEG_MARKER_CODE:
|
||||
"""JPEG marker codes."""
|
||||
|
||||
TEM = b"\x01"
|
||||
DHT = b"\xc4"
|
||||
DAC = b"\xcc"
|
||||
JPG = b"\xc8"
|
||||
|
||||
SOF0 = b"\xc0"
|
||||
SOF1 = b"\xc1"
|
||||
SOF2 = b"\xc2"
|
||||
SOF3 = b"\xc3"
|
||||
SOF5 = b"\xc5"
|
||||
SOF6 = b"\xc6"
|
||||
SOF7 = b"\xc7"
|
||||
SOF9 = b"\xc9"
|
||||
SOFA = b"\xca"
|
||||
SOFB = b"\xcb"
|
||||
SOFD = b"\xcd"
|
||||
SOFE = b"\xce"
|
||||
SOFF = b"\xcf"
|
||||
|
||||
RST0 = b"\xd0"
|
||||
RST1 = b"\xd1"
|
||||
RST2 = b"\xd2"
|
||||
RST3 = b"\xd3"
|
||||
RST4 = b"\xd4"
|
||||
RST5 = b"\xd5"
|
||||
RST6 = b"\xd6"
|
||||
RST7 = b"\xd7"
|
||||
|
||||
SOI = b"\xd8"
|
||||
EOI = b"\xd9"
|
||||
SOS = b"\xda"
|
||||
DQT = b"\xdb" # Define Quantization Table(s)
|
||||
DNL = b"\xdc"
|
||||
DRI = b"\xdd"
|
||||
DHP = b"\xde"
|
||||
EXP = b"\xdf"
|
||||
|
||||
APP0 = b"\xe0"
|
||||
APP1 = b"\xe1"
|
||||
APP2 = b"\xe2"
|
||||
APP3 = b"\xe3"
|
||||
APP4 = b"\xe4"
|
||||
APP5 = b"\xe5"
|
||||
APP6 = b"\xe6"
|
||||
APP7 = b"\xe7"
|
||||
APP8 = b"\xe8"
|
||||
APP9 = b"\xe9"
|
||||
APPA = b"\xea"
|
||||
APPB = b"\xeb"
|
||||
APPC = b"\xec"
|
||||
APPD = b"\xed"
|
||||
APPE = b"\xee"
|
||||
APPF = b"\xef"
|
||||
|
||||
STANDALONE_MARKERS = (TEM, SOI, EOI, RST0, RST1, RST2, RST3, RST4, RST5, RST6, RST7)
|
||||
|
||||
SOF_MARKER_CODES = (
|
||||
SOF0,
|
||||
SOF1,
|
||||
SOF2,
|
||||
SOF3,
|
||||
SOF5,
|
||||
SOF6,
|
||||
SOF7,
|
||||
SOF9,
|
||||
SOFA,
|
||||
SOFB,
|
||||
SOFD,
|
||||
SOFE,
|
||||
SOFF,
|
||||
)
|
||||
|
||||
marker_names = {
|
||||
b"\x00": "UNKNOWN",
|
||||
b"\xc0": "SOF0",
|
||||
b"\xc2": "SOF2",
|
||||
b"\xc4": "DHT",
|
||||
b"\xda": "SOS", # start of scan
|
||||
b"\xd8": "SOI", # start of image
|
||||
b"\xd9": "EOI", # end of image
|
||||
b"\xdb": "DQT",
|
||||
b"\xe0": "APP0",
|
||||
b"\xe1": "APP1",
|
||||
b"\xe2": "APP2",
|
||||
b"\xed": "APP13",
|
||||
b"\xee": "APP14",
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def is_standalone(cls, marker_code):
|
||||
return marker_code in cls.STANDALONE_MARKERS
|
||||
|
||||
|
||||
class MIME_TYPE:
|
||||
"""Image content types."""
|
||||
|
||||
BMP = "image/bmp"
|
||||
GIF = "image/gif"
|
||||
JPEG = "image/jpeg"
|
||||
PNG = "image/png"
|
||||
TIFF = "image/tiff"
|
||||
|
||||
|
||||
class PNG_CHUNK_TYPE:
|
||||
"""PNG chunk type names."""
|
||||
|
||||
IHDR = "IHDR"
|
||||
pHYs = "pHYs"
|
||||
IEND = "IEND"
|
||||
|
||||
|
||||
class TIFF_FLD_TYPE:
|
||||
"""Tag codes for TIFF Image File Directory (IFD) entries."""
|
||||
|
||||
BYTE = 1
|
||||
ASCII = 2
|
||||
SHORT = 3
|
||||
LONG = 4
|
||||
RATIONAL = 5
|
||||
|
||||
field_type_names = {
|
||||
1: "BYTE",
|
||||
2: "ASCII char",
|
||||
3: "SHORT",
|
||||
4: "LONG",
|
||||
5: "RATIONAL",
|
||||
}
|
||||
|
||||
|
||||
TIFF_FLD = TIFF_FLD_TYPE
|
||||
|
||||
|
||||
class TIFF_TAG:
|
||||
"""Tag codes for TIFF Image File Directory (IFD) entries."""
|
||||
|
||||
IMAGE_WIDTH = 0x0100
|
||||
IMAGE_LENGTH = 0x0101
|
||||
X_RESOLUTION = 0x011A
|
||||
Y_RESOLUTION = 0x011B
|
||||
RESOLUTION_UNIT = 0x0128
|
||||
|
||||
tag_names = {
|
||||
0x00FE: "NewSubfileType",
|
||||
0x0100: "ImageWidth",
|
||||
0x0101: "ImageLength",
|
||||
0x0102: "BitsPerSample",
|
||||
0x0103: "Compression",
|
||||
0x0106: "PhotometricInterpretation",
|
||||
0x010E: "ImageDescription",
|
||||
0x010F: "Make",
|
||||
0x0110: "Model",
|
||||
0x0111: "StripOffsets",
|
||||
0x0112: "Orientation",
|
||||
0x0115: "SamplesPerPixel",
|
||||
0x0117: "StripByteCounts",
|
||||
0x011A: "XResolution",
|
||||
0x011B: "YResolution",
|
||||
0x011C: "PlanarConfiguration",
|
||||
0x0128: "ResolutionUnit",
|
||||
0x0131: "Software",
|
||||
0x0132: "DateTime",
|
||||
0x0213: "YCbCrPositioning",
|
||||
0x8769: "ExifTag",
|
||||
0x8825: "GPS IFD",
|
||||
0xC4A5: "PrintImageMatching",
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
"""Exceptions specific the the image sub-package."""
|
||||
|
||||
|
||||
class InvalidImageStreamError(Exception):
|
||||
"""The recognized image stream appears to be corrupted."""
|
||||
|
||||
|
||||
class UnexpectedEndOfFileError(Exception):
|
||||
"""EOF was unexpectedly encountered while reading an image stream."""
|
||||
|
||||
|
||||
class UnrecognizedImageError(Exception):
|
||||
"""The provided image stream could not be recognized."""
|
||||
38
path/to/venv/lib/python3.12/site-packages/docx/image/gif.py
Normal file
38
path/to/venv/lib/python3.12/site-packages/docx/image/gif.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from struct import Struct
|
||||
|
||||
from .constants import MIME_TYPE
|
||||
from .image import BaseImageHeader
|
||||
|
||||
|
||||
class Gif(BaseImageHeader):
|
||||
"""Image header parser for GIF images.
|
||||
|
||||
Note that the GIF format does not support resolution (DPI) information. Both
|
||||
horizontal and vertical DPI default to 72.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream):
|
||||
"""Return |Gif| instance having header properties parsed from GIF image in
|
||||
`stream`."""
|
||||
px_width, px_height = cls._dimensions_from_stream(stream)
|
||||
return cls(px_width, px_height, 72, 72)
|
||||
|
||||
@property
|
||||
def content_type(self):
|
||||
"""MIME content type for this image, unconditionally `image/gif` for GIF
|
||||
images."""
|
||||
return MIME_TYPE.GIF
|
||||
|
||||
@property
|
||||
def default_ext(self):
|
||||
"""Default filename extension, always 'gif' for GIF images."""
|
||||
return "gif"
|
||||
|
||||
@classmethod
|
||||
def _dimensions_from_stream(cls, stream):
|
||||
stream.seek(6)
|
||||
bytes_ = stream.read(4)
|
||||
struct = Struct("<HH")
|
||||
px_width, px_height = struct.unpack(bytes_)
|
||||
return px_width, px_height
|
||||
@@ -0,0 +1,86 @@
|
||||
from struct import Struct
|
||||
|
||||
from .exceptions import UnexpectedEndOfFileError
|
||||
|
||||
BIG_ENDIAN = ">"
|
||||
LITTLE_ENDIAN = "<"
|
||||
|
||||
|
||||
class StreamReader:
|
||||
"""Wraps a file-like object to provide access to structured data from a binary file.
|
||||
|
||||
Byte-order is configurable. `base_offset` is added to any base value provided to
|
||||
calculate actual location for reads.
|
||||
"""
|
||||
|
||||
def __init__(self, stream, byte_order, base_offset=0):
|
||||
super(StreamReader, self).__init__()
|
||||
self._stream = stream
|
||||
self._byte_order = LITTLE_ENDIAN if byte_order == LITTLE_ENDIAN else BIG_ENDIAN
|
||||
self._base_offset = base_offset
|
||||
|
||||
def read(self, count):
|
||||
"""Allow pass-through read() call."""
|
||||
return self._stream.read(count)
|
||||
|
||||
def read_byte(self, base, offset=0):
|
||||
"""Return the int value of the byte at the file position defined by
|
||||
self._base_offset + `base` + `offset`.
|
||||
|
||||
If `base` is None, the byte is read from the current position in the stream.
|
||||
"""
|
||||
fmt = "B"
|
||||
return self._read_int(fmt, base, offset)
|
||||
|
||||
def read_long(self, base, offset=0):
|
||||
"""Return the int value of the four bytes at the file position defined by
|
||||
self._base_offset + `base` + `offset`.
|
||||
|
||||
If `base` is None, the long is read from the current position in the stream. The
|
||||
endian setting of this instance is used to interpret the byte layout of the
|
||||
long.
|
||||
"""
|
||||
fmt = "<L" if self._byte_order is LITTLE_ENDIAN else ">L"
|
||||
return self._read_int(fmt, base, offset)
|
||||
|
||||
def read_short(self, base, offset=0):
|
||||
"""Return the int value of the two bytes at the file position determined by
|
||||
`base` and `offset`, similarly to ``read_long()`` above."""
|
||||
fmt = b"<H" if self._byte_order is LITTLE_ENDIAN else b">H"
|
||||
return self._read_int(fmt, base, offset)
|
||||
|
||||
def read_str(self, char_count, base, offset=0):
|
||||
"""Return a string containing the `char_count` bytes at the file position
|
||||
determined by self._base_offset + `base` + `offset`."""
|
||||
|
||||
def str_struct(char_count):
|
||||
format_ = "%ds" % char_count
|
||||
return Struct(format_)
|
||||
|
||||
struct = str_struct(char_count)
|
||||
chars = self._unpack_item(struct, base, offset)
|
||||
unicode_str = chars.decode("UTF-8")
|
||||
return unicode_str
|
||||
|
||||
def seek(self, base, offset=0):
|
||||
location = self._base_offset + base + offset
|
||||
self._stream.seek(location)
|
||||
|
||||
def tell(self):
|
||||
"""Allow pass-through tell() call."""
|
||||
return self._stream.tell()
|
||||
|
||||
def _read_bytes(self, byte_count, base, offset):
|
||||
self.seek(base, offset)
|
||||
bytes_ = self._stream.read(byte_count)
|
||||
if len(bytes_) < byte_count:
|
||||
raise UnexpectedEndOfFileError
|
||||
return bytes_
|
||||
|
||||
def _read_int(self, fmt, base, offset):
|
||||
struct = Struct(fmt)
|
||||
return self._unpack_item(struct, base, offset)
|
||||
|
||||
def _unpack_item(self, struct, base, offset):
|
||||
bytes_ = self._read_bytes(struct.size, base, offset)
|
||||
return struct.unpack(bytes_)[0]
|
||||
234
path/to/venv/lib/python3.12/site-packages/docx/image/image.py
Normal file
234
path/to/venv/lib/python3.12/site-packages/docx/image/image.py
Normal file
@@ -0,0 +1,234 @@
|
||||
"""Provides objects that can characterize image streams.
|
||||
|
||||
That characterization is as to content type and size, as a required step in including
|
||||
them in a document.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import io
|
||||
import os
|
||||
from typing import IO, Tuple
|
||||
|
||||
from docx.image.exceptions import UnrecognizedImageError
|
||||
from docx.shared import Emu, Inches, Length, lazyproperty
|
||||
|
||||
|
||||
class Image:
|
||||
"""Graphical image stream such as JPEG, PNG, or GIF with properties and methods
|
||||
required by ImagePart."""
|
||||
|
||||
def __init__(self, blob: bytes, filename: str, image_header: BaseImageHeader):
|
||||
super(Image, self).__init__()
|
||||
self._blob = blob
|
||||
self._filename = filename
|
||||
self._image_header = image_header
|
||||
|
||||
@classmethod
|
||||
def from_blob(cls, blob: bytes) -> Image:
|
||||
"""Return a new |Image| subclass instance parsed from the image binary contained
|
||||
in `blob`."""
|
||||
stream = io.BytesIO(blob)
|
||||
return cls._from_stream(stream, blob)
|
||||
|
||||
@classmethod
|
||||
def from_file(cls, image_descriptor: str | IO[bytes]):
|
||||
"""Return a new |Image| subclass instance loaded from the image file identified
|
||||
by `image_descriptor`, a path or file-like object."""
|
||||
if isinstance(image_descriptor, str):
|
||||
path = image_descriptor
|
||||
with open(path, "rb") as f:
|
||||
blob = f.read()
|
||||
stream = io.BytesIO(blob)
|
||||
filename = os.path.basename(path)
|
||||
else:
|
||||
stream = image_descriptor
|
||||
stream.seek(0)
|
||||
blob = stream.read()
|
||||
filename = None
|
||||
return cls._from_stream(stream, blob, filename)
|
||||
|
||||
@property
|
||||
def blob(self):
|
||||
"""The bytes of the image 'file'."""
|
||||
return self._blob
|
||||
|
||||
@property
|
||||
def content_type(self) -> str:
|
||||
"""MIME content type for this image, e.g. ``'image/jpeg'`` for a JPEG image."""
|
||||
return self._image_header.content_type
|
||||
|
||||
@lazyproperty
|
||||
def ext(self):
|
||||
"""The file extension for the image.
|
||||
|
||||
If an actual one is available from a load filename it is used. Otherwise a
|
||||
canonical extension is assigned based on the content type. Does not contain the
|
||||
leading period, e.g. 'jpg', not '.jpg'.
|
||||
"""
|
||||
return os.path.splitext(self._filename)[1][1:]
|
||||
|
||||
@property
|
||||
def filename(self):
|
||||
"""Original image file name, if loaded from disk, or a generic filename if
|
||||
loaded from an anonymous stream."""
|
||||
return self._filename
|
||||
|
||||
@property
|
||||
def px_width(self) -> int:
|
||||
"""The horizontal pixel dimension of the image."""
|
||||
return self._image_header.px_width
|
||||
|
||||
@property
|
||||
def px_height(self) -> int:
|
||||
"""The vertical pixel dimension of the image."""
|
||||
return self._image_header.px_height
|
||||
|
||||
@property
|
||||
def horz_dpi(self) -> int:
|
||||
"""Integer dots per inch for the width of this image.
|
||||
|
||||
Defaults to 72 when not present in the file, as is often the case.
|
||||
"""
|
||||
return self._image_header.horz_dpi
|
||||
|
||||
@property
|
||||
def vert_dpi(self) -> int:
|
||||
"""Integer dots per inch for the height of this image.
|
||||
|
||||
Defaults to 72 when not present in the file, as is often the case.
|
||||
"""
|
||||
return self._image_header.vert_dpi
|
||||
|
||||
@property
|
||||
def width(self) -> Inches:
|
||||
"""A |Length| value representing the native width of the image, calculated from
|
||||
the values of `px_width` and `horz_dpi`."""
|
||||
return Inches(self.px_width / self.horz_dpi)
|
||||
|
||||
@property
|
||||
def height(self) -> Inches:
|
||||
"""A |Length| value representing the native height of the image, calculated from
|
||||
the values of `px_height` and `vert_dpi`."""
|
||||
return Inches(self.px_height / self.vert_dpi)
|
||||
|
||||
def scaled_dimensions(
|
||||
self, width: int | Length | None = None, height: int | Length | None = None
|
||||
) -> Tuple[Length, Length]:
|
||||
"""(cx, cy) pair representing scaled dimensions of this image.
|
||||
|
||||
The native dimensions of the image are scaled by applying the following rules to
|
||||
the `width` and `height` arguments.
|
||||
|
||||
* If both `width` and `height` are specified, the return value is (`width`,
|
||||
`height`); no scaling is performed.
|
||||
* If only one is specified, it is used to compute a scaling factor that is then
|
||||
applied to the unspecified dimension, preserving the aspect ratio of the image.
|
||||
* If both `width` and `height` are |None|, the native dimensions are returned.
|
||||
|
||||
The native dimensions are calculated using the dots-per-inch (dpi) value
|
||||
embedded in the image, defaulting to 72 dpi if no value is specified, as is
|
||||
often the case. The returned values are both |Length| objects.
|
||||
"""
|
||||
if width is None and height is None:
|
||||
return self.width, self.height
|
||||
|
||||
if width is None:
|
||||
assert height is not None
|
||||
scaling_factor = float(height) / float(self.height)
|
||||
width = round(self.width * scaling_factor)
|
||||
|
||||
if height is None:
|
||||
scaling_factor = float(width) / float(self.width)
|
||||
height = round(self.height * scaling_factor)
|
||||
|
||||
return Emu(width), Emu(height)
|
||||
|
||||
@lazyproperty
|
||||
def sha1(self):
|
||||
"""SHA1 hash digest of the image blob."""
|
||||
return hashlib.sha1(self._blob).hexdigest()
|
||||
|
||||
@classmethod
|
||||
def _from_stream(
|
||||
cls,
|
||||
stream: IO[bytes],
|
||||
blob: bytes,
|
||||
filename: str | None = None,
|
||||
) -> Image:
|
||||
"""Return an instance of the |Image| subclass corresponding to the format of the
|
||||
image in `stream`."""
|
||||
image_header = _ImageHeaderFactory(stream)
|
||||
if filename is None:
|
||||
filename = "image.%s" % image_header.default_ext
|
||||
return cls(blob, filename, image_header)
|
||||
|
||||
|
||||
def _ImageHeaderFactory(stream: IO[bytes]):
|
||||
"""A |BaseImageHeader| subclass instance that can parse headers of image in `stream`."""
|
||||
from docx.image import SIGNATURES
|
||||
|
||||
def read_32(stream: IO[bytes]):
|
||||
stream.seek(0)
|
||||
return stream.read(32)
|
||||
|
||||
header = read_32(stream)
|
||||
for cls, offset, signature_bytes in SIGNATURES:
|
||||
end = offset + len(signature_bytes)
|
||||
found_bytes = header[offset:end]
|
||||
if found_bytes == signature_bytes:
|
||||
return cls.from_stream(stream)
|
||||
raise UnrecognizedImageError
|
||||
|
||||
|
||||
class BaseImageHeader:
|
||||
"""Base class for image header subclasses like |Jpeg| and |Tiff|."""
|
||||
|
||||
def __init__(self, px_width: int, px_height: int, horz_dpi: int, vert_dpi: int):
|
||||
self._px_width = px_width
|
||||
self._px_height = px_height
|
||||
self._horz_dpi = horz_dpi
|
||||
self._vert_dpi = vert_dpi
|
||||
|
||||
@property
|
||||
def content_type(self) -> str:
|
||||
"""Abstract property definition, must be implemented by all subclasses."""
|
||||
msg = "content_type property must be implemented by all subclasses of BaseImageHeader"
|
||||
raise NotImplementedError(msg)
|
||||
|
||||
@property
|
||||
def default_ext(self) -> str:
|
||||
"""Default filename extension for images of this type.
|
||||
|
||||
An abstract property definition, must be implemented by all subclasses.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"default_ext property must be implemented by all subclasses of BaseImageHeader"
|
||||
)
|
||||
|
||||
@property
|
||||
def px_width(self):
|
||||
"""The horizontal pixel dimension of the image."""
|
||||
return self._px_width
|
||||
|
||||
@property
|
||||
def px_height(self):
|
||||
"""The vertical pixel dimension of the image."""
|
||||
return self._px_height
|
||||
|
||||
@property
|
||||
def horz_dpi(self):
|
||||
"""Integer dots per inch for the width of this image.
|
||||
|
||||
Defaults to 72 when not present in the file, as is often the case.
|
||||
"""
|
||||
return self._horz_dpi
|
||||
|
||||
@property
|
||||
def vert_dpi(self):
|
||||
"""Integer dots per inch for the height of this image.
|
||||
|
||||
Defaults to 72 when not present in the file, as is often the case.
|
||||
"""
|
||||
return self._vert_dpi
|
||||
425
path/to/venv/lib/python3.12/site-packages/docx/image/jpeg.py
Normal file
425
path/to/venv/lib/python3.12/site-packages/docx/image/jpeg.py
Normal file
@@ -0,0 +1,425 @@
|
||||
"""Objects related to parsing headers of JPEG image streams.
|
||||
|
||||
Includes both JFIF and Exif sub-formats.
|
||||
"""
|
||||
|
||||
import io
|
||||
|
||||
from docx.image.constants import JPEG_MARKER_CODE, MIME_TYPE
|
||||
from docx.image.helpers import BIG_ENDIAN, StreamReader
|
||||
from docx.image.image import BaseImageHeader
|
||||
from docx.image.tiff import Tiff
|
||||
|
||||
|
||||
class Jpeg(BaseImageHeader):
|
||||
"""Base class for JFIF and EXIF subclasses."""
|
||||
|
||||
@property
|
||||
def content_type(self):
|
||||
"""MIME content type for this image, unconditionally `image/jpeg` for JPEG
|
||||
images."""
|
||||
return MIME_TYPE.JPEG
|
||||
|
||||
@property
|
||||
def default_ext(self):
|
||||
"""Default filename extension, always 'jpg' for JPG images."""
|
||||
return "jpg"
|
||||
|
||||
|
||||
class Exif(Jpeg):
|
||||
"""Image header parser for Exif image format."""
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream):
|
||||
"""Return |Exif| instance having header properties parsed from Exif image in
|
||||
`stream`."""
|
||||
markers = _JfifMarkers.from_stream(stream)
|
||||
# print('\n%s' % markers)
|
||||
|
||||
px_width = markers.sof.px_width
|
||||
px_height = markers.sof.px_height
|
||||
horz_dpi = markers.app1.horz_dpi
|
||||
vert_dpi = markers.app1.vert_dpi
|
||||
|
||||
return cls(px_width, px_height, horz_dpi, vert_dpi)
|
||||
|
||||
|
||||
class Jfif(Jpeg):
|
||||
"""Image header parser for JFIF image format."""
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream):
|
||||
"""Return a |Jfif| instance having header properties parsed from image in
|
||||
`stream`."""
|
||||
markers = _JfifMarkers.from_stream(stream)
|
||||
|
||||
px_width = markers.sof.px_width
|
||||
px_height = markers.sof.px_height
|
||||
horz_dpi = markers.app0.horz_dpi
|
||||
vert_dpi = markers.app0.vert_dpi
|
||||
|
||||
return cls(px_width, px_height, horz_dpi, vert_dpi)
|
||||
|
||||
|
||||
class _JfifMarkers:
|
||||
"""Sequence of markers in a JPEG file, perhaps truncated at first SOS marker for
|
||||
performance reasons."""
|
||||
|
||||
def __init__(self, markers):
|
||||
super(_JfifMarkers, self).__init__()
|
||||
self._markers = list(markers)
|
||||
|
||||
def __str__(self): # pragma: no cover
|
||||
"""Returns a tabular listing of the markers in this instance, which can be handy
|
||||
for debugging and perhaps other uses."""
|
||||
header = " offset seglen mc name\n======= ====== == ====="
|
||||
tmpl = "%7d %6d %02X %s"
|
||||
rows = []
|
||||
for marker in self._markers:
|
||||
rows.append(
|
||||
tmpl
|
||||
% (
|
||||
marker.offset,
|
||||
marker.segment_length,
|
||||
ord(marker.marker_code),
|
||||
marker.name,
|
||||
)
|
||||
)
|
||||
lines = [header] + rows
|
||||
return "\n".join(lines)
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream):
|
||||
"""Return a |_JfifMarkers| instance containing a |_JfifMarker| subclass instance
|
||||
for each marker in `stream`."""
|
||||
marker_parser = _MarkerParser.from_stream(stream)
|
||||
markers = []
|
||||
for marker in marker_parser.iter_markers():
|
||||
markers.append(marker)
|
||||
if marker.marker_code == JPEG_MARKER_CODE.SOS:
|
||||
break
|
||||
return cls(markers)
|
||||
|
||||
@property
|
||||
def app0(self):
|
||||
"""First APP0 marker in image markers."""
|
||||
for m in self._markers:
|
||||
if m.marker_code == JPEG_MARKER_CODE.APP0:
|
||||
return m
|
||||
raise KeyError("no APP0 marker in image")
|
||||
|
||||
@property
|
||||
def app1(self):
|
||||
"""First APP1 marker in image markers."""
|
||||
for m in self._markers:
|
||||
if m.marker_code == JPEG_MARKER_CODE.APP1:
|
||||
return m
|
||||
raise KeyError("no APP1 marker in image")
|
||||
|
||||
@property
|
||||
def sof(self):
|
||||
"""First start of frame (SOFn) marker in this sequence."""
|
||||
for m in self._markers:
|
||||
if m.marker_code in JPEG_MARKER_CODE.SOF_MARKER_CODES:
|
||||
return m
|
||||
raise KeyError("no start of frame (SOFn) marker in image")
|
||||
|
||||
|
||||
class _MarkerParser:
|
||||
"""Service class that knows how to parse a JFIF stream and iterate over its
|
||||
markers."""
|
||||
|
||||
def __init__(self, stream_reader):
|
||||
super(_MarkerParser, self).__init__()
|
||||
self._stream = stream_reader
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream):
|
||||
"""Return a |_MarkerParser| instance to parse JFIF markers from `stream`."""
|
||||
stream_reader = StreamReader(stream, BIG_ENDIAN)
|
||||
return cls(stream_reader)
|
||||
|
||||
def iter_markers(self):
|
||||
"""Generate a (marker_code, segment_offset) 2-tuple for each marker in the JPEG
|
||||
`stream`, in the order they occur in the stream."""
|
||||
marker_finder = _MarkerFinder.from_stream(self._stream)
|
||||
start = 0
|
||||
marker_code = None
|
||||
while marker_code != JPEG_MARKER_CODE.EOI:
|
||||
marker_code, segment_offset = marker_finder.next(start)
|
||||
marker = _MarkerFactory(marker_code, self._stream, segment_offset)
|
||||
yield marker
|
||||
start = segment_offset + marker.segment_length
|
||||
|
||||
|
||||
class _MarkerFinder:
|
||||
"""Service class that knows how to find the next JFIF marker in a stream."""
|
||||
|
||||
def __init__(self, stream):
|
||||
super(_MarkerFinder, self).__init__()
|
||||
self._stream = stream
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream):
|
||||
"""Return a |_MarkerFinder| instance to find JFIF markers in `stream`."""
|
||||
return cls(stream)
|
||||
|
||||
def next(self, start):
|
||||
"""Return a (marker_code, segment_offset) 2-tuple identifying and locating the
|
||||
first marker in `stream` occuring after offset `start`.
|
||||
|
||||
The returned `segment_offset` points to the position immediately following the
|
||||
2-byte marker code, the start of the marker segment, for those markers that have
|
||||
a segment.
|
||||
"""
|
||||
position = start
|
||||
while True:
|
||||
# skip over any non-\xFF bytes
|
||||
position = self._offset_of_next_ff_byte(start=position)
|
||||
# skip over any \xFF padding bytes
|
||||
position, byte_ = self._next_non_ff_byte(start=position + 1)
|
||||
# 'FF 00' sequence is not a marker, start over if found
|
||||
if byte_ == b"\x00":
|
||||
continue
|
||||
# this is a marker, gather return values and break out of scan
|
||||
marker_code, segment_offset = byte_, position + 1
|
||||
break
|
||||
return marker_code, segment_offset
|
||||
|
||||
def _next_non_ff_byte(self, start):
|
||||
"""Return an offset, byte 2-tuple for the next byte in `stream` that is not
|
||||
'\xff', starting with the byte at offset `start`.
|
||||
|
||||
If the byte at offset `start` is not '\xff', `start` and the returned `offset`
|
||||
will be the same.
|
||||
"""
|
||||
self._stream.seek(start)
|
||||
byte_ = self._read_byte()
|
||||
while byte_ == b"\xff":
|
||||
byte_ = self._read_byte()
|
||||
offset_of_non_ff_byte = self._stream.tell() - 1
|
||||
return offset_of_non_ff_byte, byte_
|
||||
|
||||
def _offset_of_next_ff_byte(self, start):
|
||||
"""Return the offset of the next '\xff' byte in `stream` starting with the byte
|
||||
at offset `start`.
|
||||
|
||||
Returns `start` if the byte at that offset is a hex 255; it does not necessarily
|
||||
advance in the stream.
|
||||
"""
|
||||
self._stream.seek(start)
|
||||
byte_ = self._read_byte()
|
||||
while byte_ != b"\xff":
|
||||
byte_ = self._read_byte()
|
||||
offset_of_ff_byte = self._stream.tell() - 1
|
||||
return offset_of_ff_byte
|
||||
|
||||
def _read_byte(self):
|
||||
"""Return the next byte read from stream.
|
||||
|
||||
Raise Exception if stream is at end of file.
|
||||
"""
|
||||
byte_ = self._stream.read(1)
|
||||
if not byte_: # pragma: no cover
|
||||
raise Exception("unexpected end of file")
|
||||
return byte_
|
||||
|
||||
|
||||
def _MarkerFactory(marker_code, stream, offset):
|
||||
"""Return |_Marker| or subclass instance appropriate for marker at `offset` in
|
||||
`stream` having `marker_code`."""
|
||||
if marker_code == JPEG_MARKER_CODE.APP0:
|
||||
marker_cls = _App0Marker
|
||||
elif marker_code == JPEG_MARKER_CODE.APP1:
|
||||
marker_cls = _App1Marker
|
||||
elif marker_code in JPEG_MARKER_CODE.SOF_MARKER_CODES:
|
||||
marker_cls = _SofMarker
|
||||
else:
|
||||
marker_cls = _Marker
|
||||
return marker_cls.from_stream(stream, marker_code, offset)
|
||||
|
||||
|
||||
class _Marker:
|
||||
"""Base class for JFIF marker classes.
|
||||
|
||||
Represents a marker and its segment occuring in a JPEG byte stream.
|
||||
"""
|
||||
|
||||
def __init__(self, marker_code, offset, segment_length):
|
||||
super(_Marker, self).__init__()
|
||||
self._marker_code = marker_code
|
||||
self._offset = offset
|
||||
self._segment_length = segment_length
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream, marker_code, offset):
|
||||
"""Return a generic |_Marker| instance for the marker at `offset` in `stream`
|
||||
having `marker_code`."""
|
||||
if JPEG_MARKER_CODE.is_standalone(marker_code):
|
||||
segment_length = 0
|
||||
else:
|
||||
segment_length = stream.read_short(offset)
|
||||
return cls(marker_code, offset, segment_length)
|
||||
|
||||
@property
|
||||
def marker_code(self):
|
||||
"""The single-byte code that identifies the type of this marker, e.g. ``'\xe0'``
|
||||
for start of image (SOI)."""
|
||||
return self._marker_code
|
||||
|
||||
@property
|
||||
def name(self): # pragma: no cover
|
||||
return JPEG_MARKER_CODE.marker_names[self._marker_code]
|
||||
|
||||
@property
|
||||
def offset(self): # pragma: no cover
|
||||
return self._offset
|
||||
|
||||
@property
|
||||
def segment_length(self):
|
||||
"""The length in bytes of this marker's segment."""
|
||||
return self._segment_length
|
||||
|
||||
|
||||
class _App0Marker(_Marker):
|
||||
"""Represents a JFIF APP0 marker segment."""
|
||||
|
||||
def __init__(self, marker_code, offset, length, density_units, x_density, y_density):
|
||||
super(_App0Marker, self).__init__(marker_code, offset, length)
|
||||
self._density_units = density_units
|
||||
self._x_density = x_density
|
||||
self._y_density = y_density
|
||||
|
||||
@property
|
||||
def horz_dpi(self):
|
||||
"""Horizontal dots per inch specified in this marker, defaults to 72 if not
|
||||
specified."""
|
||||
return self._dpi(self._x_density)
|
||||
|
||||
@property
|
||||
def vert_dpi(self):
|
||||
"""Vertical dots per inch specified in this marker, defaults to 72 if not
|
||||
specified."""
|
||||
return self._dpi(self._y_density)
|
||||
|
||||
def _dpi(self, density):
|
||||
"""Return dots per inch corresponding to `density` value."""
|
||||
if self._density_units == 1:
|
||||
dpi = density
|
||||
elif self._density_units == 2:
|
||||
dpi = int(round(density * 2.54))
|
||||
else:
|
||||
dpi = 72
|
||||
return dpi
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream, marker_code, offset):
|
||||
"""Return an |_App0Marker| instance for the APP0 marker at `offset` in
|
||||
`stream`."""
|
||||
# field off type notes
|
||||
# ------------------ --- ----- -------------------
|
||||
# segment length 0 short
|
||||
# JFIF identifier 2 5 chr 'JFIF\x00'
|
||||
# major JPEG version 7 byte typically 1
|
||||
# minor JPEG version 8 byte typically 1 or 2
|
||||
# density units 9 byte 1=inches, 2=cm
|
||||
# horz dots per unit 10 short
|
||||
# vert dots per unit 12 short
|
||||
# ------------------ --- ----- -------------------
|
||||
segment_length = stream.read_short(offset)
|
||||
density_units = stream.read_byte(offset, 9)
|
||||
x_density = stream.read_short(offset, 10)
|
||||
y_density = stream.read_short(offset, 12)
|
||||
return cls(marker_code, offset, segment_length, density_units, x_density, y_density)
|
||||
|
||||
|
||||
class _App1Marker(_Marker):
|
||||
"""Represents a JFIF APP1 (Exif) marker segment."""
|
||||
|
||||
def __init__(self, marker_code, offset, length, horz_dpi, vert_dpi):
|
||||
super(_App1Marker, self).__init__(marker_code, offset, length)
|
||||
self._horz_dpi = horz_dpi
|
||||
self._vert_dpi = vert_dpi
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream, marker_code, offset):
|
||||
"""Extract the horizontal and vertical dots-per-inch value from the APP1 header
|
||||
at `offset` in `stream`."""
|
||||
# field off len type notes
|
||||
# -------------------- --- --- ----- ----------------------------
|
||||
# segment length 0 2 short
|
||||
# Exif identifier 2 6 6 chr 'Exif\x00\x00'
|
||||
# TIFF byte order 8 2 2 chr 'II'=little 'MM'=big endian
|
||||
# meaning of universe 10 2 2 chr '*\x00' or '\x00*' depending
|
||||
# IFD0 off fr/II or MM 10 16 long relative to ...?
|
||||
# -------------------- --- --- ----- ----------------------------
|
||||
segment_length = stream.read_short(offset)
|
||||
if cls._is_non_Exif_APP1_segment(stream, offset):
|
||||
return cls(marker_code, offset, segment_length, 72, 72)
|
||||
tiff = cls._tiff_from_exif_segment(stream, offset, segment_length)
|
||||
return cls(marker_code, offset, segment_length, tiff.horz_dpi, tiff.vert_dpi)
|
||||
|
||||
@property
|
||||
def horz_dpi(self):
|
||||
"""Horizontal dots per inch specified in this marker, defaults to 72 if not
|
||||
specified."""
|
||||
return self._horz_dpi
|
||||
|
||||
@property
|
||||
def vert_dpi(self):
|
||||
"""Vertical dots per inch specified in this marker, defaults to 72 if not
|
||||
specified."""
|
||||
return self._vert_dpi
|
||||
|
||||
@classmethod
|
||||
def _is_non_Exif_APP1_segment(cls, stream, offset):
|
||||
"""Return True if the APP1 segment at `offset` in `stream` is NOT an Exif
|
||||
segment, as determined by the ``'Exif\x00\x00'`` signature at offset 2 in the
|
||||
segment."""
|
||||
stream.seek(offset + 2)
|
||||
exif_signature = stream.read(6)
|
||||
return exif_signature != b"Exif\x00\x00"
|
||||
|
||||
@classmethod
|
||||
def _tiff_from_exif_segment(cls, stream, offset, segment_length):
|
||||
"""Return a |Tiff| instance parsed from the Exif APP1 segment of
|
||||
`segment_length` at `offset` in `stream`."""
|
||||
# wrap full segment in its own stream and feed to Tiff()
|
||||
stream.seek(offset + 8)
|
||||
segment_bytes = stream.read(segment_length - 8)
|
||||
substream = io.BytesIO(segment_bytes)
|
||||
return Tiff.from_stream(substream)
|
||||
|
||||
|
||||
class _SofMarker(_Marker):
|
||||
"""Represents a JFIF start of frame (SOFx) marker segment."""
|
||||
|
||||
def __init__(self, marker_code, offset, segment_length, px_width, px_height):
|
||||
super(_SofMarker, self).__init__(marker_code, offset, segment_length)
|
||||
self._px_width = px_width
|
||||
self._px_height = px_height
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream, marker_code, offset):
|
||||
"""Return an |_SofMarker| instance for the SOFn marker at `offset` in stream."""
|
||||
# field off type notes
|
||||
# ------------------ --- ----- ----------------------------
|
||||
# segment length 0 short
|
||||
# Data precision 2 byte
|
||||
# Vertical lines 3 short px_height
|
||||
# Horizontal lines 5 short px_width
|
||||
# ------------------ --- ----- ----------------------------
|
||||
segment_length = stream.read_short(offset)
|
||||
px_height = stream.read_short(offset, 3)
|
||||
px_width = stream.read_short(offset, 5)
|
||||
return cls(marker_code, offset, segment_length, px_width, px_height)
|
||||
|
||||
@property
|
||||
def px_height(self):
|
||||
"""Image height in pixels."""
|
||||
return self._px_height
|
||||
|
||||
@property
|
||||
def px_width(self):
|
||||
"""Image width in pixels."""
|
||||
return self._px_width
|
||||
253
path/to/venv/lib/python3.12/site-packages/docx/image/png.py
Normal file
253
path/to/venv/lib/python3.12/site-packages/docx/image/png.py
Normal file
@@ -0,0 +1,253 @@
|
||||
from .constants import MIME_TYPE, PNG_CHUNK_TYPE
|
||||
from .exceptions import InvalidImageStreamError
|
||||
from .helpers import BIG_ENDIAN, StreamReader
|
||||
from .image import BaseImageHeader
|
||||
|
||||
|
||||
class Png(BaseImageHeader):
|
||||
"""Image header parser for PNG images."""
|
||||
|
||||
@property
|
||||
def content_type(self):
|
||||
"""MIME content type for this image, unconditionally `image/png` for PNG
|
||||
images."""
|
||||
return MIME_TYPE.PNG
|
||||
|
||||
@property
|
||||
def default_ext(self):
|
||||
"""Default filename extension, always 'png' for PNG images."""
|
||||
return "png"
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream):
|
||||
"""Return a |Png| instance having header properties parsed from image in
|
||||
`stream`."""
|
||||
parser = _PngParser.parse(stream)
|
||||
|
||||
px_width = parser.px_width
|
||||
px_height = parser.px_height
|
||||
horz_dpi = parser.horz_dpi
|
||||
vert_dpi = parser.vert_dpi
|
||||
|
||||
return cls(px_width, px_height, horz_dpi, vert_dpi)
|
||||
|
||||
|
||||
class _PngParser:
|
||||
"""Parses a PNG image stream to extract the image properties found in its chunks."""
|
||||
|
||||
def __init__(self, chunks):
|
||||
super(_PngParser, self).__init__()
|
||||
self._chunks = chunks
|
||||
|
||||
@classmethod
|
||||
def parse(cls, stream):
|
||||
"""Return a |_PngParser| instance containing the header properties parsed from
|
||||
the PNG image in `stream`."""
|
||||
chunks = _Chunks.from_stream(stream)
|
||||
return cls(chunks)
|
||||
|
||||
@property
|
||||
def px_width(self):
|
||||
"""The number of pixels in each row of the image."""
|
||||
IHDR = self._chunks.IHDR
|
||||
return IHDR.px_width
|
||||
|
||||
@property
|
||||
def px_height(self):
|
||||
"""The number of stacked rows of pixels in the image."""
|
||||
IHDR = self._chunks.IHDR
|
||||
return IHDR.px_height
|
||||
|
||||
@property
|
||||
def horz_dpi(self):
|
||||
"""Integer dots per inch for the width of this image.
|
||||
|
||||
Defaults to 72 when not present in the file, as is often the case.
|
||||
"""
|
||||
pHYs = self._chunks.pHYs
|
||||
if pHYs is None:
|
||||
return 72
|
||||
return self._dpi(pHYs.units_specifier, pHYs.horz_px_per_unit)
|
||||
|
||||
@property
|
||||
def vert_dpi(self):
|
||||
"""Integer dots per inch for the height of this image.
|
||||
|
||||
Defaults to 72 when not present in the file, as is often the case.
|
||||
"""
|
||||
pHYs = self._chunks.pHYs
|
||||
if pHYs is None:
|
||||
return 72
|
||||
return self._dpi(pHYs.units_specifier, pHYs.vert_px_per_unit)
|
||||
|
||||
@staticmethod
|
||||
def _dpi(units_specifier, px_per_unit):
|
||||
"""Return dots per inch value calculated from `units_specifier` and
|
||||
`px_per_unit`."""
|
||||
if units_specifier == 1 and px_per_unit:
|
||||
return int(round(px_per_unit * 0.0254))
|
||||
return 72
|
||||
|
||||
|
||||
class _Chunks:
|
||||
"""Collection of the chunks parsed from a PNG image stream."""
|
||||
|
||||
def __init__(self, chunk_iterable):
|
||||
super(_Chunks, self).__init__()
|
||||
self._chunks = list(chunk_iterable)
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream):
|
||||
"""Return a |_Chunks| instance containing the PNG chunks in `stream`."""
|
||||
chunk_parser = _ChunkParser.from_stream(stream)
|
||||
chunks = list(chunk_parser.iter_chunks())
|
||||
return cls(chunks)
|
||||
|
||||
@property
|
||||
def IHDR(self):
|
||||
"""IHDR chunk in PNG image."""
|
||||
match = lambda chunk: chunk.type_name == PNG_CHUNK_TYPE.IHDR # noqa
|
||||
IHDR = self._find_first(match)
|
||||
if IHDR is None:
|
||||
raise InvalidImageStreamError("no IHDR chunk in PNG image")
|
||||
return IHDR
|
||||
|
||||
@property
|
||||
def pHYs(self):
|
||||
"""PHYs chunk in PNG image, or |None| if not present."""
|
||||
match = lambda chunk: chunk.type_name == PNG_CHUNK_TYPE.pHYs # noqa
|
||||
return self._find_first(match)
|
||||
|
||||
def _find_first(self, match):
|
||||
"""Return first chunk in stream order returning True for function `match`."""
|
||||
for chunk in self._chunks:
|
||||
if match(chunk):
|
||||
return chunk
|
||||
return None
|
||||
|
||||
|
||||
class _ChunkParser:
|
||||
"""Extracts chunks from a PNG image stream."""
|
||||
|
||||
def __init__(self, stream_rdr):
|
||||
super(_ChunkParser, self).__init__()
|
||||
self._stream_rdr = stream_rdr
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream):
|
||||
"""Return a |_ChunkParser| instance that can extract the chunks from the PNG
|
||||
image in `stream`."""
|
||||
stream_rdr = StreamReader(stream, BIG_ENDIAN)
|
||||
return cls(stream_rdr)
|
||||
|
||||
def iter_chunks(self):
|
||||
"""Generate a |_Chunk| subclass instance for each chunk in this parser's PNG
|
||||
stream, in the order encountered in the stream."""
|
||||
for chunk_type, offset in self._iter_chunk_offsets():
|
||||
chunk = _ChunkFactory(chunk_type, self._stream_rdr, offset)
|
||||
yield chunk
|
||||
|
||||
def _iter_chunk_offsets(self):
|
||||
"""Generate a (chunk_type, chunk_offset) 2-tuple for each of the chunks in the
|
||||
PNG image stream.
|
||||
|
||||
Iteration stops after the IEND chunk is returned.
|
||||
"""
|
||||
chunk_offset = 8
|
||||
while True:
|
||||
chunk_data_len = self._stream_rdr.read_long(chunk_offset)
|
||||
chunk_type = self._stream_rdr.read_str(4, chunk_offset, 4)
|
||||
data_offset = chunk_offset + 8
|
||||
yield chunk_type, data_offset
|
||||
if chunk_type == "IEND":
|
||||
break
|
||||
# incr offset for chunk len long, chunk type, chunk data, and CRC
|
||||
chunk_offset += 4 + 4 + chunk_data_len + 4
|
||||
|
||||
|
||||
def _ChunkFactory(chunk_type, stream_rdr, offset):
|
||||
"""Return a |_Chunk| subclass instance appropriate to `chunk_type` parsed from
|
||||
`stream_rdr` at `offset`."""
|
||||
chunk_cls_map = {
|
||||
PNG_CHUNK_TYPE.IHDR: _IHDRChunk,
|
||||
PNG_CHUNK_TYPE.pHYs: _pHYsChunk,
|
||||
}
|
||||
chunk_cls = chunk_cls_map.get(chunk_type, _Chunk)
|
||||
return chunk_cls.from_offset(chunk_type, stream_rdr, offset)
|
||||
|
||||
|
||||
class _Chunk:
|
||||
"""Base class for specific chunk types.
|
||||
|
||||
Also serves as the default chunk type.
|
||||
"""
|
||||
|
||||
def __init__(self, chunk_type):
|
||||
super(_Chunk, self).__init__()
|
||||
self._chunk_type = chunk_type
|
||||
|
||||
@classmethod
|
||||
def from_offset(cls, chunk_type, stream_rdr, offset):
|
||||
"""Return a default _Chunk instance that only knows its chunk type."""
|
||||
return cls(chunk_type)
|
||||
|
||||
@property
|
||||
def type_name(self):
|
||||
"""The chunk type name, e.g. 'IHDR', 'pHYs', etc."""
|
||||
return self._chunk_type
|
||||
|
||||
|
||||
class _IHDRChunk(_Chunk):
|
||||
"""IHDR chunk, contains the image dimensions."""
|
||||
|
||||
def __init__(self, chunk_type, px_width, px_height):
|
||||
super(_IHDRChunk, self).__init__(chunk_type)
|
||||
self._px_width = px_width
|
||||
self._px_height = px_height
|
||||
|
||||
@classmethod
|
||||
def from_offset(cls, chunk_type, stream_rdr, offset):
|
||||
"""Return an _IHDRChunk instance containing the image dimensions extracted from
|
||||
the IHDR chunk in `stream` at `offset`."""
|
||||
px_width = stream_rdr.read_long(offset)
|
||||
px_height = stream_rdr.read_long(offset, 4)
|
||||
return cls(chunk_type, px_width, px_height)
|
||||
|
||||
@property
|
||||
def px_width(self):
|
||||
return self._px_width
|
||||
|
||||
@property
|
||||
def px_height(self):
|
||||
return self._px_height
|
||||
|
||||
|
||||
class _pHYsChunk(_Chunk):
|
||||
"""PYHs chunk, contains the image dpi information."""
|
||||
|
||||
def __init__(self, chunk_type, horz_px_per_unit, vert_px_per_unit, units_specifier):
|
||||
super(_pHYsChunk, self).__init__(chunk_type)
|
||||
self._horz_px_per_unit = horz_px_per_unit
|
||||
self._vert_px_per_unit = vert_px_per_unit
|
||||
self._units_specifier = units_specifier
|
||||
|
||||
@classmethod
|
||||
def from_offset(cls, chunk_type, stream_rdr, offset):
|
||||
"""Return a _pHYsChunk instance containing the image resolution extracted from
|
||||
the pHYs chunk in `stream` at `offset`."""
|
||||
horz_px_per_unit = stream_rdr.read_long(offset)
|
||||
vert_px_per_unit = stream_rdr.read_long(offset, 4)
|
||||
units_specifier = stream_rdr.read_byte(offset, 8)
|
||||
return cls(chunk_type, horz_px_per_unit, vert_px_per_unit, units_specifier)
|
||||
|
||||
@property
|
||||
def horz_px_per_unit(self):
|
||||
return self._horz_px_per_unit
|
||||
|
||||
@property
|
||||
def vert_px_per_unit(self):
|
||||
return self._vert_px_per_unit
|
||||
|
||||
@property
|
||||
def units_specifier(self):
|
||||
return self._units_specifier
|
||||
289
path/to/venv/lib/python3.12/site-packages/docx/image/tiff.py
Normal file
289
path/to/venv/lib/python3.12/site-packages/docx/image/tiff.py
Normal file
@@ -0,0 +1,289 @@
|
||||
from .constants import MIME_TYPE, TIFF_FLD, TIFF_TAG
|
||||
from .helpers import BIG_ENDIAN, LITTLE_ENDIAN, StreamReader
|
||||
from .image import BaseImageHeader
|
||||
|
||||
|
||||
class Tiff(BaseImageHeader):
|
||||
"""Image header parser for TIFF images.
|
||||
|
||||
Handles both big and little endian byte ordering.
|
||||
"""
|
||||
|
||||
@property
|
||||
def content_type(self):
|
||||
"""Return the MIME type of this TIFF image, unconditionally the string
|
||||
``image/tiff``."""
|
||||
return MIME_TYPE.TIFF
|
||||
|
||||
@property
|
||||
def default_ext(self):
|
||||
"""Default filename extension, always 'tiff' for TIFF images."""
|
||||
return "tiff"
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream):
|
||||
"""Return a |Tiff| instance containing the properties of the TIFF image in
|
||||
`stream`."""
|
||||
parser = _TiffParser.parse(stream)
|
||||
|
||||
px_width = parser.px_width
|
||||
px_height = parser.px_height
|
||||
horz_dpi = parser.horz_dpi
|
||||
vert_dpi = parser.vert_dpi
|
||||
|
||||
return cls(px_width, px_height, horz_dpi, vert_dpi)
|
||||
|
||||
|
||||
class _TiffParser:
|
||||
"""Parses a TIFF image stream to extract the image properties found in its main
|
||||
image file directory (IFD)"""
|
||||
|
||||
def __init__(self, ifd_entries):
|
||||
super(_TiffParser, self).__init__()
|
||||
self._ifd_entries = ifd_entries
|
||||
|
||||
@classmethod
|
||||
def parse(cls, stream):
|
||||
"""Return an instance of |_TiffParser| containing the properties parsed from the
|
||||
TIFF image in `stream`."""
|
||||
stream_rdr = cls._make_stream_reader(stream)
|
||||
ifd0_offset = stream_rdr.read_long(4)
|
||||
ifd_entries = _IfdEntries.from_stream(stream_rdr, ifd0_offset)
|
||||
return cls(ifd_entries)
|
||||
|
||||
@property
|
||||
def horz_dpi(self):
|
||||
"""The horizontal dots per inch value calculated from the XResolution and
|
||||
ResolutionUnit tags of the IFD; defaults to 72 if those tags are not present."""
|
||||
return self._dpi(TIFF_TAG.X_RESOLUTION)
|
||||
|
||||
@property
|
||||
def vert_dpi(self):
|
||||
"""The vertical dots per inch value calculated from the XResolution and
|
||||
ResolutionUnit tags of the IFD; defaults to 72 if those tags are not present."""
|
||||
return self._dpi(TIFF_TAG.Y_RESOLUTION)
|
||||
|
||||
@property
|
||||
def px_height(self):
|
||||
"""The number of stacked rows of pixels in the image, |None| if the IFD contains
|
||||
no ``ImageLength`` tag, the expected case when the TIFF is embeded in an Exif
|
||||
image."""
|
||||
return self._ifd_entries.get(TIFF_TAG.IMAGE_LENGTH)
|
||||
|
||||
@property
|
||||
def px_width(self):
|
||||
"""The number of pixels in each row in the image, |None| if the IFD contains no
|
||||
``ImageWidth`` tag, the expected case when the TIFF is embeded in an Exif
|
||||
image."""
|
||||
return self._ifd_entries.get(TIFF_TAG.IMAGE_WIDTH)
|
||||
|
||||
@classmethod
|
||||
def _detect_endian(cls, stream):
|
||||
"""Return either BIG_ENDIAN or LITTLE_ENDIAN depending on the endian indicator
|
||||
found in the TIFF `stream` header, either 'MM' or 'II'."""
|
||||
stream.seek(0)
|
||||
endian_str = stream.read(2)
|
||||
return BIG_ENDIAN if endian_str == b"MM" else LITTLE_ENDIAN
|
||||
|
||||
def _dpi(self, resolution_tag):
|
||||
"""Return the dpi value calculated for `resolution_tag`, which can be either
|
||||
TIFF_TAG.X_RESOLUTION or TIFF_TAG.Y_RESOLUTION.
|
||||
|
||||
The calculation is based on the values of both that tag and the
|
||||
TIFF_TAG.RESOLUTION_UNIT tag in this parser's |_IfdEntries| instance.
|
||||
"""
|
||||
ifd_entries = self._ifd_entries
|
||||
|
||||
if resolution_tag not in ifd_entries:
|
||||
return 72
|
||||
|
||||
# resolution unit defaults to inches (2)
|
||||
resolution_unit = ifd_entries.get(TIFF_TAG.RESOLUTION_UNIT, 2)
|
||||
|
||||
if resolution_unit == 1: # aspect ratio only
|
||||
return 72
|
||||
# resolution_unit == 2 for inches, 3 for centimeters
|
||||
units_per_inch = 1 if resolution_unit == 2 else 2.54
|
||||
dots_per_unit = ifd_entries[resolution_tag]
|
||||
return int(round(dots_per_unit * units_per_inch))
|
||||
|
||||
@classmethod
|
||||
def _make_stream_reader(cls, stream):
|
||||
"""Return a |StreamReader| instance with wrapping `stream` and having "endian-
|
||||
ness" determined by the 'MM' or 'II' indicator in the TIFF stream header."""
|
||||
endian = cls._detect_endian(stream)
|
||||
return StreamReader(stream, endian)
|
||||
|
||||
|
||||
class _IfdEntries:
|
||||
"""Image File Directory for a TIFF image, having mapping (dict) semantics allowing
|
||||
"tag" values to be retrieved by tag code."""
|
||||
|
||||
def __init__(self, entries):
|
||||
super(_IfdEntries, self).__init__()
|
||||
self._entries = entries
|
||||
|
||||
def __contains__(self, key):
|
||||
"""Provides ``in`` operator, e.g. ``tag in ifd_entries``"""
|
||||
return self._entries.__contains__(key)
|
||||
|
||||
def __getitem__(self, key):
|
||||
"""Provides indexed access, e.g. ``tag_value = ifd_entries[tag_code]``"""
|
||||
return self._entries.__getitem__(key)
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream, offset):
|
||||
"""Return a new |_IfdEntries| instance parsed from `stream` starting at
|
||||
`offset`."""
|
||||
ifd_parser = _IfdParser(stream, offset)
|
||||
entries = {e.tag: e.value for e in ifd_parser.iter_entries()}
|
||||
return cls(entries)
|
||||
|
||||
def get(self, tag_code, default=None):
|
||||
"""Return value of IFD entry having tag matching `tag_code`, or `default` if no
|
||||
matching tag found."""
|
||||
return self._entries.get(tag_code, default)
|
||||
|
||||
|
||||
class _IfdParser:
|
||||
"""Service object that knows how to extract directory entries from an Image File
|
||||
Directory (IFD)"""
|
||||
|
||||
def __init__(self, stream_rdr, offset):
|
||||
super(_IfdParser, self).__init__()
|
||||
self._stream_rdr = stream_rdr
|
||||
self._offset = offset
|
||||
|
||||
def iter_entries(self):
|
||||
"""Generate an |_IfdEntry| instance corresponding to each entry in the
|
||||
directory."""
|
||||
for idx in range(self._entry_count):
|
||||
dir_entry_offset = self._offset + 2 + (idx * 12)
|
||||
ifd_entry = _IfdEntryFactory(self._stream_rdr, dir_entry_offset)
|
||||
yield ifd_entry
|
||||
|
||||
@property
|
||||
def _entry_count(self):
|
||||
"""The count of directory entries, read from the top of the IFD header."""
|
||||
return self._stream_rdr.read_short(self._offset)
|
||||
|
||||
|
||||
def _IfdEntryFactory(stream_rdr, offset):
|
||||
"""Return an |_IfdEntry| subclass instance containing the value of the directory
|
||||
entry at `offset` in `stream_rdr`."""
|
||||
ifd_entry_classes = {
|
||||
TIFF_FLD.ASCII: _AsciiIfdEntry,
|
||||
TIFF_FLD.SHORT: _ShortIfdEntry,
|
||||
TIFF_FLD.LONG: _LongIfdEntry,
|
||||
TIFF_FLD.RATIONAL: _RationalIfdEntry,
|
||||
}
|
||||
field_type = stream_rdr.read_short(offset, 2)
|
||||
EntryCls = ifd_entry_classes.get(field_type, _IfdEntry)
|
||||
return EntryCls.from_stream(stream_rdr, offset)
|
||||
|
||||
|
||||
class _IfdEntry:
|
||||
"""Base class for IFD entry classes.
|
||||
|
||||
Subclasses are differentiated by value type, e.g. ASCII, long int, etc.
|
||||
"""
|
||||
|
||||
def __init__(self, tag_code, value):
|
||||
super(_IfdEntry, self).__init__()
|
||||
self._tag_code = tag_code
|
||||
self._value = value
|
||||
|
||||
@classmethod
|
||||
def from_stream(cls, stream_rdr, offset):
|
||||
"""Return an |_IfdEntry| subclass instance containing the tag and value of the
|
||||
tag parsed from `stream_rdr` at `offset`.
|
||||
|
||||
Note this method is common to all subclasses. Override the ``_parse_value()``
|
||||
method to provide distinctive behavior based on field type.
|
||||
"""
|
||||
tag_code = stream_rdr.read_short(offset, 0)
|
||||
value_count = stream_rdr.read_long(offset, 4)
|
||||
value_offset = stream_rdr.read_long(offset, 8)
|
||||
value = cls._parse_value(stream_rdr, offset, value_count, value_offset)
|
||||
return cls(tag_code, value)
|
||||
|
||||
@classmethod
|
||||
def _parse_value(cls, stream_rdr, offset, value_count, value_offset):
|
||||
"""Return the value of this field parsed from `stream_rdr` at `offset`.
|
||||
|
||||
Intended to be overridden by subclasses.
|
||||
"""
|
||||
return "UNIMPLEMENTED FIELD TYPE" # pragma: no cover
|
||||
|
||||
@property
|
||||
def tag(self):
|
||||
"""Short int code that identifies this IFD entry."""
|
||||
return self._tag_code
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
"""Value of this tag, its type being dependent on the tag."""
|
||||
return self._value
|
||||
|
||||
|
||||
class _AsciiIfdEntry(_IfdEntry):
|
||||
"""IFD entry having the form of a NULL-terminated ASCII string."""
|
||||
|
||||
@classmethod
|
||||
def _parse_value(cls, stream_rdr, offset, value_count, value_offset):
|
||||
"""Return the ASCII string parsed from `stream_rdr` at `value_offset`.
|
||||
|
||||
The length of the string, including a terminating '\x00' (NUL) character, is in
|
||||
`value_count`.
|
||||
"""
|
||||
return stream_rdr.read_str(value_count - 1, value_offset)
|
||||
|
||||
|
||||
class _ShortIfdEntry(_IfdEntry):
|
||||
"""IFD entry expressed as a short (2-byte) integer."""
|
||||
|
||||
@classmethod
|
||||
def _parse_value(cls, stream_rdr, offset, value_count, value_offset):
|
||||
"""Return the short int value contained in the `value_offset` field of this
|
||||
entry.
|
||||
|
||||
Only supports single values at present.
|
||||
"""
|
||||
if value_count == 1:
|
||||
return stream_rdr.read_short(offset, 8)
|
||||
else: # pragma: no cover
|
||||
return "Multi-value short integer NOT IMPLEMENTED"
|
||||
|
||||
|
||||
class _LongIfdEntry(_IfdEntry):
|
||||
"""IFD entry expressed as a long (4-byte) integer."""
|
||||
|
||||
@classmethod
|
||||
def _parse_value(cls, stream_rdr, offset, value_count, value_offset):
|
||||
"""Return the long int value contained in the `value_offset` field of this
|
||||
entry.
|
||||
|
||||
Only supports single values at present.
|
||||
"""
|
||||
if value_count == 1:
|
||||
return stream_rdr.read_long(offset, 8)
|
||||
else: # pragma: no cover
|
||||
return "Multi-value long integer NOT IMPLEMENTED"
|
||||
|
||||
|
||||
class _RationalIfdEntry(_IfdEntry):
|
||||
"""IFD entry expressed as a numerator, denominator pair."""
|
||||
|
||||
@classmethod
|
||||
def _parse_value(cls, stream_rdr, offset, value_count, value_offset):
|
||||
"""Return the rational (numerator / denominator) value at `value_offset` in
|
||||
`stream_rdr` as a floating-point number.
|
||||
|
||||
Only supports single values at present.
|
||||
"""
|
||||
if value_count == 1:
|
||||
numerator = stream_rdr.read_long(value_offset)
|
||||
denominator = stream_rdr.read_long(value_offset, 4)
|
||||
return numerator / denominator
|
||||
else: # pragma: no cover
|
||||
return "Multi-value Rational NOT IMPLEMENTED"
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
306
path/to/venv/lib/python3.12/site-packages/docx/opc/constants.py
Normal file
306
path/to/venv/lib/python3.12/site-packages/docx/opc/constants.py
Normal file
@@ -0,0 +1,306 @@
|
||||
"""Constant values related to the Open Packaging Convention.
|
||||
|
||||
In particular it includes content types and relationship types.
|
||||
"""
|
||||
|
||||
|
||||
class CONTENT_TYPE:
|
||||
"""Content type URIs (like MIME-types) that specify a part's format."""
|
||||
|
||||
BMP = "image/bmp"
|
||||
DML_CHART = "application/vnd.openxmlformats-officedocument.drawingml.chart+xml"
|
||||
DML_CHARTSHAPES = "application/vnd.openxmlformats-officedocument.drawingml.chartshapes+xml"
|
||||
DML_DIAGRAM_COLORS = "application/vnd.openxmlformats-officedocument.drawingml.diagramColors+xml"
|
||||
DML_DIAGRAM_DATA = "application/vnd.openxmlformats-officedocument.drawingml.diagramData+xml"
|
||||
DML_DIAGRAM_LAYOUT = "application/vnd.openxmlformats-officedocument.drawingml.diagramLayout+xml"
|
||||
DML_DIAGRAM_STYLE = "application/vnd.openxmlformats-officedocument.drawingml.diagramStyle+xml"
|
||||
GIF = "image/gif"
|
||||
JPEG = "image/jpeg"
|
||||
MS_PHOTO = "image/vnd.ms-photo"
|
||||
OFC_CUSTOM_PROPERTIES = "application/vnd.openxmlformats-officedocument.custom-properties+xml"
|
||||
OFC_CUSTOM_XML_PROPERTIES = (
|
||||
"application/vnd.openxmlformats-officedocument.customXmlProperties+xml"
|
||||
)
|
||||
OFC_DRAWING = "application/vnd.openxmlformats-officedocument.drawing+xml"
|
||||
OFC_EXTENDED_PROPERTIES = (
|
||||
"application/vnd.openxmlformats-officedocument.extended-properties+xml"
|
||||
)
|
||||
OFC_OLE_OBJECT = "application/vnd.openxmlformats-officedocument.oleObject"
|
||||
OFC_PACKAGE = "application/vnd.openxmlformats-officedocument.package"
|
||||
OFC_THEME = "application/vnd.openxmlformats-officedocument.theme+xml"
|
||||
OFC_THEME_OVERRIDE = "application/vnd.openxmlformats-officedocument.themeOverride+xml"
|
||||
OFC_VML_DRAWING = "application/vnd.openxmlformats-officedocument.vmlDrawing"
|
||||
OPC_CORE_PROPERTIES = "application/vnd.openxmlformats-package.core-properties+xml"
|
||||
OPC_DIGITAL_SIGNATURE_CERTIFICATE = (
|
||||
"application/vnd.openxmlformats-package.digital-signature-certificate"
|
||||
)
|
||||
OPC_DIGITAL_SIGNATURE_ORIGIN = "application/vnd.openxmlformats-package.digital-signature-origin"
|
||||
OPC_DIGITAL_SIGNATURE_XMLSIGNATURE = (
|
||||
"application/vnd.openxmlformats-package.digital-signature-xmlsignature+xml"
|
||||
)
|
||||
OPC_RELATIONSHIPS = "application/vnd.openxmlformats-package.relationships+xml"
|
||||
PML_COMMENTS = "application/vnd.openxmlformats-officedocument.presentationml.comments+xml"
|
||||
PML_COMMENT_AUTHORS = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.commentAuthors+xml"
|
||||
)
|
||||
PML_HANDOUT_MASTER = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.handoutMaster+xml"
|
||||
)
|
||||
PML_NOTES_MASTER = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.notesMaster+xml"
|
||||
)
|
||||
PML_NOTES_SLIDE = "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml"
|
||||
PML_PRESENTATION_MAIN = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml"
|
||||
)
|
||||
PML_PRES_PROPS = "application/vnd.openxmlformats-officedocument.presentationml.presProps+xml"
|
||||
PML_PRINTER_SETTINGS = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.printerSettings"
|
||||
)
|
||||
PML_SLIDE = "application/vnd.openxmlformats-officedocument.presentationml.slide+xml"
|
||||
PML_SLIDESHOW_MAIN = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.slideshow.main+xml"
|
||||
)
|
||||
PML_SLIDE_LAYOUT = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.slideLayout+xml"
|
||||
)
|
||||
PML_SLIDE_MASTER = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.slideMaster+xml"
|
||||
)
|
||||
PML_SLIDE_UPDATE_INFO = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.slideUpdateInfo+xml"
|
||||
)
|
||||
PML_TABLE_STYLES = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.tableStyles+xml"
|
||||
)
|
||||
PML_TAGS = "application/vnd.openxmlformats-officedocument.presentationml.tags+xml"
|
||||
PML_TEMPLATE_MAIN = (
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.template.main+xml"
|
||||
)
|
||||
PML_VIEW_PROPS = "application/vnd.openxmlformats-officedocument.presentationml.viewProps+xml"
|
||||
PNG = "image/png"
|
||||
SML_CALC_CHAIN = "application/vnd.openxmlformats-officedocument.spreadsheetml.calcChain+xml"
|
||||
SML_CHARTSHEET = "application/vnd.openxmlformats-officedocument.spreadsheetml.chartsheet+xml"
|
||||
SML_COMMENTS = "application/vnd.openxmlformats-officedocument.spreadsheetml.comments+xml"
|
||||
SML_CONNECTIONS = "application/vnd.openxmlformats-officedocument.spreadsheetml.connections+xml"
|
||||
SML_CUSTOM_PROPERTY = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.customProperty"
|
||||
)
|
||||
SML_DIALOGSHEET = "application/vnd.openxmlformats-officedocument.spreadsheetml.dialogsheet+xml"
|
||||
SML_EXTERNAL_LINK = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.externalLink+xml"
|
||||
)
|
||||
SML_PIVOT_CACHE_DEFINITION = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.pivotCacheDefinition+xml"
|
||||
)
|
||||
SML_PIVOT_CACHE_RECORDS = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.pivotCacheRecords+xml"
|
||||
)
|
||||
SML_PIVOT_TABLE = "application/vnd.openxmlformats-officedocument.spreadsheetml.pivotTable+xml"
|
||||
SML_PRINTER_SETTINGS = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.printerSettings"
|
||||
)
|
||||
SML_QUERY_TABLE = "application/vnd.openxmlformats-officedocument.spreadsheetml.queryTable+xml"
|
||||
SML_REVISION_HEADERS = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.revisionHeaders+xml"
|
||||
)
|
||||
SML_REVISION_LOG = "application/vnd.openxmlformats-officedocument.spreadsheetml.revisionLog+xml"
|
||||
SML_SHARED_STRINGS = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml"
|
||||
)
|
||||
SML_SHEET = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
SML_SHEET_MAIN = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"
|
||||
SML_SHEET_METADATA = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheetMetadata+xml"
|
||||
)
|
||||
SML_STYLES = "application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml"
|
||||
SML_TABLE = "application/vnd.openxmlformats-officedocument.spreadsheetml.table+xml"
|
||||
SML_TABLE_SINGLE_CELLS = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.tableSingleCells+xml"
|
||||
)
|
||||
SML_TEMPLATE_MAIN = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.template.main+xml"
|
||||
)
|
||||
SML_USER_NAMES = "application/vnd.openxmlformats-officedocument.spreadsheetml.userNames+xml"
|
||||
SML_VOLATILE_DEPENDENCIES = (
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.volatileDependencies+xml"
|
||||
)
|
||||
SML_WORKSHEET = "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"
|
||||
TIFF = "image/tiff"
|
||||
WML_COMMENTS = "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"
|
||||
WML_DOCUMENT = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
WML_DOCUMENT_GLOSSARY = (
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document.glossary+xml"
|
||||
)
|
||||
WML_DOCUMENT_MAIN = (
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"
|
||||
)
|
||||
WML_ENDNOTES = "application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml"
|
||||
WML_FONT_TABLE = "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml"
|
||||
WML_FOOTER = "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"
|
||||
WML_FOOTNOTES = "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml"
|
||||
WML_HEADER = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"
|
||||
WML_NUMBERING = "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml"
|
||||
WML_PRINTER_SETTINGS = (
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.printerSettings"
|
||||
)
|
||||
WML_SETTINGS = "application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml"
|
||||
WML_STYLES = "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"
|
||||
WML_WEB_SETTINGS = (
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml"
|
||||
)
|
||||
XML = "application/xml"
|
||||
X_EMF = "image/x-emf"
|
||||
X_FONTDATA = "application/x-fontdata"
|
||||
X_FONT_TTF = "application/x-font-ttf"
|
||||
X_WMF = "image/x-wmf"
|
||||
|
||||
|
||||
class NAMESPACE:
|
||||
"""Constant values for OPC XML namespaces."""
|
||||
|
||||
DML_WORDPROCESSING_DRAWING = (
|
||||
"http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
|
||||
)
|
||||
OFC_RELATIONSHIPS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
||||
OPC_RELATIONSHIPS = "http://schemas.openxmlformats.org/package/2006/relationships"
|
||||
OPC_CONTENT_TYPES = "http://schemas.openxmlformats.org/package/2006/content-types"
|
||||
WML_MAIN = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
||||
|
||||
|
||||
class RELATIONSHIP_TARGET_MODE:
|
||||
"""Open XML relationship target modes."""
|
||||
|
||||
EXTERNAL = "External"
|
||||
INTERNAL = "Internal"
|
||||
|
||||
|
||||
class RELATIONSHIP_TYPE:
|
||||
AUDIO = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/audio"
|
||||
A_F_CHUNK = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/aFChunk"
|
||||
CALC_CHAIN = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/calcChain"
|
||||
CERTIFICATE = (
|
||||
"http://schemas.openxmlformats.org/package/2006/relationships/digital-signature/certificate"
|
||||
)
|
||||
CHART = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/chart"
|
||||
CHARTSHEET = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/chartsheet"
|
||||
CHART_USER_SHAPES = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/chartUserShapes"
|
||||
)
|
||||
COMMENTS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments"
|
||||
COMMENT_AUTHORS = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/commentAuthors"
|
||||
)
|
||||
CONNECTIONS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/connections"
|
||||
CONTROL = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/control"
|
||||
CORE_PROPERTIES = (
|
||||
"http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties"
|
||||
)
|
||||
CUSTOM_PROPERTIES = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties"
|
||||
)
|
||||
CUSTOM_PROPERTY = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/customProperty"
|
||||
)
|
||||
CUSTOM_XML = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/customXml"
|
||||
CUSTOM_XML_PROPS = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/customXmlProps"
|
||||
)
|
||||
DIAGRAM_COLORS = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/diagramColors"
|
||||
)
|
||||
DIAGRAM_DATA = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/diagramData"
|
||||
DIAGRAM_LAYOUT = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/diagramLayout"
|
||||
)
|
||||
DIAGRAM_QUICK_STYLE = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/diagramQuickStyle"
|
||||
)
|
||||
DIALOGSHEET = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/dialogsheet"
|
||||
DRAWING = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/drawing"
|
||||
ENDNOTES = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes"
|
||||
EXTENDED_PROPERTIES = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties"
|
||||
)
|
||||
EXTERNAL_LINK = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/externalLink"
|
||||
)
|
||||
FONT = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/font"
|
||||
FONT_TABLE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable"
|
||||
FOOTER = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer"
|
||||
FOOTNOTES = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes"
|
||||
GLOSSARY_DOCUMENT = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/glossaryDocument"
|
||||
)
|
||||
HANDOUT_MASTER = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/handoutMaster"
|
||||
)
|
||||
HEADER = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header"
|
||||
HYPERLINK = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"
|
||||
IMAGE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"
|
||||
NOTES_MASTER = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster"
|
||||
NOTES_SLIDE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide"
|
||||
NUMBERING = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering"
|
||||
OFFICE_DOCUMENT = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"
|
||||
)
|
||||
OLE_OBJECT = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject"
|
||||
ORIGIN = "http://schemas.openxmlformats.org/package/2006/relationships/digital-signature/origin"
|
||||
PACKAGE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/package"
|
||||
PIVOT_CACHE_DEFINITION = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/pivotCacheDefinition"
|
||||
)
|
||||
PIVOT_CACHE_RECORDS = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships"
|
||||
"/spreadsheetml/pivotCacheRecords"
|
||||
)
|
||||
PIVOT_TABLE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/pivotTable"
|
||||
PRES_PROPS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/presProps"
|
||||
PRINTER_SETTINGS = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/printerSettings"
|
||||
)
|
||||
QUERY_TABLE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/queryTable"
|
||||
REVISION_HEADERS = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/revisionHeaders"
|
||||
)
|
||||
REVISION_LOG = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/revisionLog"
|
||||
SETTINGS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings"
|
||||
SHARED_STRINGS = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings"
|
||||
)
|
||||
SHEET_METADATA = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/sheetMetadata"
|
||||
)
|
||||
SIGNATURE = (
|
||||
"http://schemas.openxmlformats.org/package/2006/relationships/digital-signature/signature"
|
||||
)
|
||||
SLIDE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide"
|
||||
SLIDE_LAYOUT = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout"
|
||||
SLIDE_MASTER = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideMaster"
|
||||
SLIDE_UPDATE_INFO = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideUpdateInfo"
|
||||
)
|
||||
STYLES = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles"
|
||||
TABLE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/table"
|
||||
TABLE_SINGLE_CELLS = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/tableSingleCells"
|
||||
)
|
||||
TABLE_STYLES = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/tableStyles"
|
||||
TAGS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/tags"
|
||||
THEME = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme"
|
||||
THEME_OVERRIDE = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/themeOverride"
|
||||
)
|
||||
THUMBNAIL = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/thumbnail"
|
||||
USERNAMES = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/usernames"
|
||||
VIDEO = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/video"
|
||||
VIEW_PROPS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/viewProps"
|
||||
VML_DRAWING = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/vmlDrawing"
|
||||
VOLATILE_DEPENDENCIES = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/volatileDependencies"
|
||||
)
|
||||
WEB_SETTINGS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings"
|
||||
WORKSHEET_SOURCE = (
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheetSource"
|
||||
)
|
||||
XML_MAPS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/xmlMaps"
|
||||
142
path/to/venv/lib/python3.12/site-packages/docx/opc/coreprops.py
Normal file
142
path/to/venv/lib/python3.12/site-packages/docx/opc/coreprops.py
Normal file
@@ -0,0 +1,142 @@
|
||||
"""Provides CoreProperties, Dublin-Core attributes of the document.
|
||||
|
||||
These are broadly-standardized attributes like author, last-modified, etc.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from docx.oxml.coreprops import CT_CoreProperties
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.oxml.coreprops import CT_CoreProperties
|
||||
|
||||
|
||||
class CoreProperties:
|
||||
"""Corresponds to part named ``/docProps/core.xml``, containing the core document
|
||||
properties for this document package."""
|
||||
|
||||
def __init__(self, element: CT_CoreProperties):
|
||||
self._element = element
|
||||
|
||||
@property
|
||||
def author(self):
|
||||
return self._element.author_text
|
||||
|
||||
@author.setter
|
||||
def author(self, value: str):
|
||||
self._element.author_text = value
|
||||
|
||||
@property
|
||||
def category(self):
|
||||
return self._element.category_text
|
||||
|
||||
@category.setter
|
||||
def category(self, value: str):
|
||||
self._element.category_text = value
|
||||
|
||||
@property
|
||||
def comments(self):
|
||||
return self._element.comments_text
|
||||
|
||||
@comments.setter
|
||||
def comments(self, value: str):
|
||||
self._element.comments_text = value
|
||||
|
||||
@property
|
||||
def content_status(self):
|
||||
return self._element.contentStatus_text
|
||||
|
||||
@content_status.setter
|
||||
def content_status(self, value: str):
|
||||
self._element.contentStatus_text = value
|
||||
|
||||
@property
|
||||
def created(self):
|
||||
return self._element.created_datetime
|
||||
|
||||
@created.setter
|
||||
def created(self, value: dt.datetime):
|
||||
self._element.created_datetime = value
|
||||
|
||||
@property
|
||||
def identifier(self):
|
||||
return self._element.identifier_text
|
||||
|
||||
@identifier.setter
|
||||
def identifier(self, value: str):
|
||||
self._element.identifier_text = value
|
||||
|
||||
@property
|
||||
def keywords(self):
|
||||
return self._element.keywords_text
|
||||
|
||||
@keywords.setter
|
||||
def keywords(self, value: str):
|
||||
self._element.keywords_text = value
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return self._element.language_text
|
||||
|
||||
@language.setter
|
||||
def language(self, value: str):
|
||||
self._element.language_text = value
|
||||
|
||||
@property
|
||||
def last_modified_by(self):
|
||||
return self._element.lastModifiedBy_text
|
||||
|
||||
@last_modified_by.setter
|
||||
def last_modified_by(self, value: str):
|
||||
self._element.lastModifiedBy_text = value
|
||||
|
||||
@property
|
||||
def last_printed(self):
|
||||
return self._element.lastPrinted_datetime
|
||||
|
||||
@last_printed.setter
|
||||
def last_printed(self, value: dt.datetime):
|
||||
self._element.lastPrinted_datetime = value
|
||||
|
||||
@property
|
||||
def modified(self):
|
||||
return self._element.modified_datetime
|
||||
|
||||
@modified.setter
|
||||
def modified(self, value: dt.datetime):
|
||||
self._element.modified_datetime = value
|
||||
|
||||
@property
|
||||
def revision(self):
|
||||
return self._element.revision_number
|
||||
|
||||
@revision.setter
|
||||
def revision(self, value: int):
|
||||
self._element.revision_number = value
|
||||
|
||||
@property
|
||||
def subject(self):
|
||||
return self._element.subject_text
|
||||
|
||||
@subject.setter
|
||||
def subject(self, value: str):
|
||||
self._element.subject_text = value
|
||||
|
||||
@property
|
||||
def title(self):
|
||||
return self._element.title_text
|
||||
|
||||
@title.setter
|
||||
def title(self, value: str):
|
||||
self._element.title_text = value
|
||||
|
||||
@property
|
||||
def version(self):
|
||||
return self._element.version_text
|
||||
|
||||
@version.setter
|
||||
def version(self, value: str):
|
||||
self._element.version_text = value
|
||||
@@ -0,0 +1,12 @@
|
||||
"""Exceptions specific to python-opc.
|
||||
|
||||
The base exception class is OpcError.
|
||||
"""
|
||||
|
||||
|
||||
class OpcError(Exception):
|
||||
"""Base error class for python-opc."""
|
||||
|
||||
|
||||
class PackageNotFoundError(OpcError):
|
||||
"""Raised when a package cannot be found at the specified path."""
|
||||
247
path/to/venv/lib/python3.12/site-packages/docx/opc/oxml.py
Normal file
247
path/to/venv/lib/python3.12/site-packages/docx/opc/oxml.py
Normal file
@@ -0,0 +1,247 @@
|
||||
# pyright: reportPrivateUsage=false
|
||||
|
||||
"""Temporary stand-in for main oxml module.
|
||||
|
||||
This module came across with the PackageReader transplant. Probably much will get
|
||||
replaced with objects from the pptx.oxml.core and then this module will either get
|
||||
deleted or only hold the package related custom element classes.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import cast
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from docx.opc.constants import NAMESPACE as NS
|
||||
from docx.opc.constants import RELATIONSHIP_TARGET_MODE as RTM
|
||||
|
||||
# configure XML parser
|
||||
element_class_lookup = etree.ElementNamespaceClassLookup()
|
||||
oxml_parser = etree.XMLParser(remove_blank_text=True, resolve_entities=False)
|
||||
oxml_parser.set_element_class_lookup(element_class_lookup)
|
||||
|
||||
nsmap = {
|
||||
"ct": NS.OPC_CONTENT_TYPES,
|
||||
"pr": NS.OPC_RELATIONSHIPS,
|
||||
"r": NS.OFC_RELATIONSHIPS,
|
||||
}
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# functions
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
def parse_xml(text: str) -> etree._Element:
|
||||
"""`etree.fromstring()` replacement that uses oxml parser."""
|
||||
return etree.fromstring(text, oxml_parser)
|
||||
|
||||
|
||||
def qn(tag: str) -> str:
|
||||
"""Stands for "qualified name", a utility function to turn a namespace prefixed tag
|
||||
name into a Clark-notation qualified tag name for lxml.
|
||||
|
||||
For
|
||||
example, ``qn('p:cSld')`` returns ``'{http://schemas.../main}cSld'``.
|
||||
"""
|
||||
prefix, tagroot = tag.split(":")
|
||||
uri = nsmap[prefix]
|
||||
return "{%s}%s" % (uri, tagroot)
|
||||
|
||||
|
||||
def serialize_part_xml(part_elm: etree._Element) -> bytes:
|
||||
"""Serialize `part_elm` etree element to XML suitable for storage as an XML part.
|
||||
|
||||
That is to say, no insignificant whitespace added for readability, and an
|
||||
appropriate XML declaration added with UTF-8 encoding specified.
|
||||
"""
|
||||
return etree.tostring(part_elm, encoding="UTF-8", standalone=True)
|
||||
|
||||
|
||||
def serialize_for_reading(element: etree._Element) -> str:
|
||||
"""Serialize `element` to human-readable XML suitable for tests.
|
||||
|
||||
No XML declaration.
|
||||
"""
|
||||
return etree.tostring(element, encoding="unicode", pretty_print=True)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Custom element classes
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
class BaseOxmlElement(etree.ElementBase):
|
||||
"""Base class for all custom element classes, to add standardized behavior to all
|
||||
classes in one place."""
|
||||
|
||||
@property
|
||||
def xml(self) -> str:
|
||||
"""Return XML string for this element, suitable for testing purposes.
|
||||
|
||||
Pretty printed for readability and without an XML declaration at the top.
|
||||
"""
|
||||
return serialize_for_reading(self)
|
||||
|
||||
|
||||
class CT_Default(BaseOxmlElement):
|
||||
"""`<Default>` element that appears in `[Content_Types].xml` part.
|
||||
|
||||
Used to specify a default content type to be applied to any part with the specified extension.
|
||||
"""
|
||||
|
||||
@property
|
||||
def content_type(self):
|
||||
"""String held in the ``ContentType`` attribute of this ``<Default>``
|
||||
element."""
|
||||
return self.get("ContentType")
|
||||
|
||||
@property
|
||||
def extension(self):
|
||||
"""String held in the ``Extension`` attribute of this ``<Default>`` element."""
|
||||
return self.get("Extension")
|
||||
|
||||
@staticmethod
|
||||
def new(ext: str, content_type: str):
|
||||
"""Return a new ``<Default>`` element with attributes set to parameter values."""
|
||||
xml = '<Default xmlns="%s"/>' % nsmap["ct"]
|
||||
default = parse_xml(xml)
|
||||
default.set("Extension", ext)
|
||||
default.set("ContentType", content_type)
|
||||
return default
|
||||
|
||||
|
||||
class CT_Override(BaseOxmlElement):
|
||||
"""``<Override>`` element, specifying the content type to be applied for a part with
|
||||
the specified partname."""
|
||||
|
||||
@property
|
||||
def content_type(self):
|
||||
"""String held in the ``ContentType`` attribute of this ``<Override>``
|
||||
element."""
|
||||
return self.get("ContentType")
|
||||
|
||||
@staticmethod
|
||||
def new(partname, content_type):
|
||||
"""Return a new ``<Override>`` element with attributes set to parameter values."""
|
||||
xml = '<Override xmlns="%s"/>' % nsmap["ct"]
|
||||
override = parse_xml(xml)
|
||||
override.set("PartName", partname)
|
||||
override.set("ContentType", content_type)
|
||||
return override
|
||||
|
||||
@property
|
||||
def partname(self):
|
||||
"""String held in the ``PartName`` attribute of this ``<Override>`` element."""
|
||||
return self.get("PartName")
|
||||
|
||||
|
||||
class CT_Relationship(BaseOxmlElement):
|
||||
"""`<Relationship>` element, representing a single relationship from source to target part."""
|
||||
|
||||
@staticmethod
|
||||
def new(rId: str, reltype: str, target: str, target_mode: str = RTM.INTERNAL):
|
||||
"""Return a new ``<Relationship>`` element."""
|
||||
xml = '<Relationship xmlns="%s"/>' % nsmap["pr"]
|
||||
relationship = parse_xml(xml)
|
||||
relationship.set("Id", rId)
|
||||
relationship.set("Type", reltype)
|
||||
relationship.set("Target", target)
|
||||
if target_mode == RTM.EXTERNAL:
|
||||
relationship.set("TargetMode", RTM.EXTERNAL)
|
||||
return relationship
|
||||
|
||||
@property
|
||||
def rId(self):
|
||||
"""String held in the ``Id`` attribute of this ``<Relationship>`` element."""
|
||||
return self.get("Id")
|
||||
|
||||
@property
|
||||
def reltype(self):
|
||||
"""String held in the ``Type`` attribute of this ``<Relationship>`` element."""
|
||||
return self.get("Type")
|
||||
|
||||
@property
|
||||
def target_ref(self):
|
||||
"""String held in the ``Target`` attribute of this ``<Relationship>``
|
||||
element."""
|
||||
return self.get("Target")
|
||||
|
||||
@property
|
||||
def target_mode(self):
|
||||
"""String held in the ``TargetMode`` attribute of this ``<Relationship>``
|
||||
element, either ``Internal`` or ``External``.
|
||||
|
||||
Defaults to ``Internal``.
|
||||
"""
|
||||
return self.get("TargetMode", RTM.INTERNAL)
|
||||
|
||||
|
||||
class CT_Relationships(BaseOxmlElement):
|
||||
"""``<Relationships>`` element, the root element in a .rels file."""
|
||||
|
||||
def add_rel(self, rId: str, reltype: str, target: str, is_external: bool = False):
|
||||
"""Add a child ``<Relationship>`` element with attributes set according to
|
||||
parameter values."""
|
||||
target_mode = RTM.EXTERNAL if is_external else RTM.INTERNAL
|
||||
relationship = CT_Relationship.new(rId, reltype, target, target_mode)
|
||||
self.append(relationship)
|
||||
|
||||
@staticmethod
|
||||
def new() -> CT_Relationships:
|
||||
"""Return a new ``<Relationships>`` element."""
|
||||
xml = '<Relationships xmlns="%s"/>' % nsmap["pr"]
|
||||
return cast(CT_Relationships, parse_xml(xml))
|
||||
|
||||
@property
|
||||
def Relationship_lst(self):
|
||||
"""Return a list containing all the ``<Relationship>`` child elements."""
|
||||
return self.findall(qn("pr:Relationship"))
|
||||
|
||||
@property
|
||||
def xml(self):
|
||||
"""Return XML string for this element, suitable for saving in a .rels stream,
|
||||
not pretty printed and with an XML declaration at the top."""
|
||||
return serialize_part_xml(self)
|
||||
|
||||
|
||||
class CT_Types(BaseOxmlElement):
|
||||
"""``<Types>`` element, the container element for Default and Override elements in
|
||||
[Content_Types].xml."""
|
||||
|
||||
def add_default(self, ext, content_type):
|
||||
"""Add a child ``<Default>`` element with attributes set to parameter values."""
|
||||
default = CT_Default.new(ext, content_type)
|
||||
self.append(default)
|
||||
|
||||
def add_override(self, partname, content_type):
|
||||
"""Add a child ``<Override>`` element with attributes set to parameter
|
||||
values."""
|
||||
override = CT_Override.new(partname, content_type)
|
||||
self.append(override)
|
||||
|
||||
@property
|
||||
def defaults(self):
|
||||
return self.findall(qn("ct:Default"))
|
||||
|
||||
@staticmethod
|
||||
def new():
|
||||
"""Return a new ``<Types>`` element."""
|
||||
xml = '<Types xmlns="%s"/>' % nsmap["ct"]
|
||||
types = parse_xml(xml)
|
||||
return types
|
||||
|
||||
@property
|
||||
def overrides(self):
|
||||
return self.findall(qn("ct:Override"))
|
||||
|
||||
|
||||
ct_namespace = element_class_lookup.get_namespace(nsmap["ct"])
|
||||
ct_namespace["Default"] = CT_Default
|
||||
ct_namespace["Override"] = CT_Override
|
||||
ct_namespace["Types"] = CT_Types
|
||||
|
||||
pr_namespace = element_class_lookup.get_namespace(nsmap["pr"])
|
||||
pr_namespace["Relationship"] = CT_Relationship
|
||||
pr_namespace["Relationships"] = CT_Relationships
|
||||
219
path/to/venv/lib/python3.12/site-packages/docx/opc/package.py
Normal file
219
path/to/venv/lib/python3.12/site-packages/docx/opc/package.py
Normal file
@@ -0,0 +1,219 @@
|
||||
"""Objects that implement reading and writing OPC packages."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, TYPE_CHECKING, Iterator, cast
|
||||
|
||||
from docx.opc.constants import RELATIONSHIP_TYPE as RT
|
||||
from docx.opc.packuri import PACKAGE_URI, PackURI
|
||||
from docx.opc.part import PartFactory
|
||||
from docx.opc.parts.coreprops import CorePropertiesPart
|
||||
from docx.opc.pkgreader import PackageReader
|
||||
from docx.opc.pkgwriter import PackageWriter
|
||||
from docx.opc.rel import Relationships
|
||||
from docx.shared import lazyproperty
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing_extensions import Self
|
||||
|
||||
from docx.opc.coreprops import CoreProperties
|
||||
from docx.opc.part import Part
|
||||
from docx.opc.rel import _Relationship # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
|
||||
class OpcPackage:
|
||||
"""Main API class for |python-opc|.
|
||||
|
||||
A new instance is constructed by calling the :meth:`open` class method with a path
|
||||
to a package file or file-like object containing one.
|
||||
"""
|
||||
|
||||
def after_unmarshal(self):
|
||||
"""Entry point for any post-unmarshaling processing.
|
||||
|
||||
May be overridden by subclasses without forwarding call to super.
|
||||
"""
|
||||
# don't place any code here, just catch call if not overridden by
|
||||
# subclass
|
||||
pass
|
||||
|
||||
@property
|
||||
def core_properties(self) -> CoreProperties:
|
||||
"""|CoreProperties| object providing read/write access to the Dublin Core
|
||||
properties for this document."""
|
||||
return self._core_properties_part.core_properties
|
||||
|
||||
def iter_rels(self) -> Iterator[_Relationship]:
|
||||
"""Generate exactly one reference to each relationship in the package by
|
||||
performing a depth-first traversal of the rels graph."""
|
||||
|
||||
def walk_rels(
|
||||
source: OpcPackage | Part, visited: list[Part] | None = None
|
||||
) -> Iterator[_Relationship]:
|
||||
visited = [] if visited is None else visited
|
||||
for rel in source.rels.values():
|
||||
yield rel
|
||||
if rel.is_external:
|
||||
continue
|
||||
part = rel.target_part
|
||||
if part in visited:
|
||||
continue
|
||||
visited.append(part)
|
||||
new_source = part
|
||||
for rel in walk_rels(new_source, visited):
|
||||
yield rel
|
||||
|
||||
for rel in walk_rels(self):
|
||||
yield rel
|
||||
|
||||
def iter_parts(self) -> Iterator[Part]:
|
||||
"""Generate exactly one reference to each of the parts in the package by
|
||||
performing a depth-first traversal of the rels graph."""
|
||||
|
||||
def walk_parts(source, visited=[]):
|
||||
for rel in source.rels.values():
|
||||
if rel.is_external:
|
||||
continue
|
||||
part = rel.target_part
|
||||
if part in visited:
|
||||
continue
|
||||
visited.append(part)
|
||||
yield part
|
||||
new_source = part
|
||||
for part in walk_parts(new_source, visited):
|
||||
yield part
|
||||
|
||||
for part in walk_parts(self):
|
||||
yield part
|
||||
|
||||
def load_rel(self, reltype: str, target: Part | str, rId: str, is_external: bool = False):
|
||||
"""Return newly added |_Relationship| instance of `reltype` between this part
|
||||
and `target` with key `rId`.
|
||||
|
||||
Target mode is set to ``RTM.EXTERNAL`` if `is_external` is |True|. Intended for
|
||||
use during load from a serialized package, where the rId is well known. Other
|
||||
methods exist for adding a new relationship to the package during processing.
|
||||
"""
|
||||
return self.rels.add_relationship(reltype, target, rId, is_external)
|
||||
|
||||
@property
|
||||
def main_document_part(self):
|
||||
"""Return a reference to the main document part for this package.
|
||||
|
||||
Examples include a document part for a WordprocessingML package, a presentation
|
||||
part for a PresentationML package, or a workbook part for a SpreadsheetML
|
||||
package.
|
||||
"""
|
||||
return self.part_related_by(RT.OFFICE_DOCUMENT)
|
||||
|
||||
def next_partname(self, template: str) -> PackURI:
|
||||
"""Return a |PackURI| instance representing partname matching `template`.
|
||||
|
||||
The returned part-name has the next available numeric suffix to distinguish it
|
||||
from other parts of its type. `template` is a printf (%)-style template string
|
||||
containing a single replacement item, a '%d' to be used to insert the integer
|
||||
portion of the partname. Example: "/word/header%d.xml"
|
||||
"""
|
||||
partnames = {part.partname for part in self.iter_parts()}
|
||||
for n in range(1, len(partnames) + 2):
|
||||
candidate_partname = template % n
|
||||
if candidate_partname not in partnames:
|
||||
return PackURI(candidate_partname)
|
||||
|
||||
@classmethod
|
||||
def open(cls, pkg_file: str | IO[bytes]) -> Self:
|
||||
"""Return an |OpcPackage| instance loaded with the contents of `pkg_file`."""
|
||||
pkg_reader = PackageReader.from_file(pkg_file)
|
||||
package = cls()
|
||||
Unmarshaller.unmarshal(pkg_reader, package, PartFactory)
|
||||
return package
|
||||
|
||||
def part_related_by(self, reltype: str) -> Part:
|
||||
"""Return part to which this package has a relationship of `reltype`.
|
||||
|
||||
Raises |KeyError| if no such relationship is found and |ValueError| if more than
|
||||
one such relationship is found.
|
||||
"""
|
||||
return self.rels.part_with_reltype(reltype)
|
||||
|
||||
@property
|
||||
def parts(self) -> list[Part]:
|
||||
"""Return a list containing a reference to each of the parts in this package."""
|
||||
return list(self.iter_parts())
|
||||
|
||||
def relate_to(self, part: Part, reltype: str):
|
||||
"""Return rId key of new or existing relationship to `part`.
|
||||
|
||||
If a relationship of `reltype` to `part` already exists, its rId is returned. Otherwise a
|
||||
new relationship is created and that rId is returned.
|
||||
"""
|
||||
rel = self.rels.get_or_add(reltype, part)
|
||||
return rel.rId
|
||||
|
||||
@lazyproperty
|
||||
def rels(self):
|
||||
"""Return a reference to the |Relationships| instance holding the collection of
|
||||
relationships for this package."""
|
||||
return Relationships(PACKAGE_URI.baseURI)
|
||||
|
||||
def save(self, pkg_file: str | IO[bytes]):
|
||||
"""Save this package to `pkg_file`.
|
||||
|
||||
`pkg_file` can be either a file-path or a file-like object.
|
||||
"""
|
||||
for part in self.parts:
|
||||
part.before_marshal()
|
||||
PackageWriter.write(pkg_file, self.rels, self.parts)
|
||||
|
||||
@property
|
||||
def _core_properties_part(self) -> CorePropertiesPart:
|
||||
"""|CorePropertiesPart| object related to this package.
|
||||
|
||||
Creates a default core properties part if one is not present (not common).
|
||||
"""
|
||||
try:
|
||||
return cast(CorePropertiesPart, self.part_related_by(RT.CORE_PROPERTIES))
|
||||
except KeyError:
|
||||
core_properties_part = CorePropertiesPart.default(self)
|
||||
self.relate_to(core_properties_part, RT.CORE_PROPERTIES)
|
||||
return core_properties_part
|
||||
|
||||
|
||||
class Unmarshaller:
|
||||
"""Hosts static methods for unmarshalling a package from a |PackageReader|."""
|
||||
|
||||
@staticmethod
|
||||
def unmarshal(pkg_reader, package, part_factory):
|
||||
"""Construct graph of parts and realized relationships based on the contents of
|
||||
`pkg_reader`, delegating construction of each part to `part_factory`.
|
||||
|
||||
Package relationships are added to `pkg`.
|
||||
"""
|
||||
parts = Unmarshaller._unmarshal_parts(pkg_reader, package, part_factory)
|
||||
Unmarshaller._unmarshal_relationships(pkg_reader, package, parts)
|
||||
for part in parts.values():
|
||||
part.after_unmarshal()
|
||||
package.after_unmarshal()
|
||||
|
||||
@staticmethod
|
||||
def _unmarshal_parts(pkg_reader, package, part_factory):
|
||||
"""Return a dictionary of |Part| instances unmarshalled from `pkg_reader`, keyed
|
||||
by partname.
|
||||
|
||||
Side-effect is that each part in `pkg_reader` is constructed using
|
||||
`part_factory`.
|
||||
"""
|
||||
parts = {}
|
||||
for partname, content_type, reltype, blob in pkg_reader.iter_sparts():
|
||||
parts[partname] = part_factory(partname, content_type, reltype, blob, package)
|
||||
return parts
|
||||
|
||||
@staticmethod
|
||||
def _unmarshal_relationships(pkg_reader, package, parts):
|
||||
"""Add a relationship to the source object corresponding to each of the
|
||||
relationships in `pkg_reader` with its target_part set to the actual target part
|
||||
in `parts`."""
|
||||
for source_uri, srel in pkg_reader.iter_srels():
|
||||
source = package if source_uri == "/" else parts[source_uri]
|
||||
target = srel.target_ref if srel.is_external else parts[srel.target_partname]
|
||||
source.load_rel(srel.reltype, target, srel.rId, srel.is_external)
|
||||
109
path/to/venv/lib/python3.12/site-packages/docx/opc/packuri.py
Normal file
109
path/to/venv/lib/python3.12/site-packages/docx/opc/packuri.py
Normal file
@@ -0,0 +1,109 @@
|
||||
"""Provides the PackURI value type.
|
||||
|
||||
Also some useful known pack URI strings such as PACKAGE_URI.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import posixpath
|
||||
import re
|
||||
|
||||
|
||||
class PackURI(str):
|
||||
"""Provides access to pack URI components such as the baseURI and the filename slice.
|
||||
|
||||
Behaves as |str| otherwise.
|
||||
"""
|
||||
|
||||
_filename_re = re.compile("([a-zA-Z]+)([1-9][0-9]*)?")
|
||||
|
||||
def __new__(cls, pack_uri_str: str):
|
||||
if pack_uri_str[0] != "/":
|
||||
tmpl = "PackURI must begin with slash, got '%s'"
|
||||
raise ValueError(tmpl % pack_uri_str)
|
||||
return str.__new__(cls, pack_uri_str)
|
||||
|
||||
@staticmethod
|
||||
def from_rel_ref(baseURI: str, relative_ref: str) -> PackURI:
|
||||
"""The absolute PackURI formed by translating `relative_ref` onto `baseURI`."""
|
||||
joined_uri = posixpath.join(baseURI, relative_ref)
|
||||
abs_uri = posixpath.abspath(joined_uri)
|
||||
return PackURI(abs_uri)
|
||||
|
||||
@property
|
||||
def baseURI(self) -> str:
|
||||
"""The base URI of this pack URI, the directory portion, roughly speaking.
|
||||
|
||||
E.g. ``'/ppt/slides'`` for ``'/ppt/slides/slide1.xml'``. For the package pseudo-
|
||||
partname '/', baseURI is '/'.
|
||||
"""
|
||||
return posixpath.split(self)[0]
|
||||
|
||||
@property
|
||||
def ext(self) -> str:
|
||||
"""The extension portion of this pack URI, e.g. ``'xml'`` for ``'/word/document.xml'``.
|
||||
|
||||
Note the period is not included.
|
||||
"""
|
||||
# raw_ext is either empty string or starts with period, e.g. '.xml'
|
||||
raw_ext = posixpath.splitext(self)[1]
|
||||
return raw_ext[1:] if raw_ext.startswith(".") else raw_ext
|
||||
|
||||
@property
|
||||
def filename(self):
|
||||
"""The "filename" portion of this pack URI, e.g. ``'slide1.xml'`` for
|
||||
``'/ppt/slides/slide1.xml'``.
|
||||
|
||||
For the package pseudo-partname '/', filename is ''.
|
||||
"""
|
||||
return posixpath.split(self)[1]
|
||||
|
||||
@property
|
||||
def idx(self):
|
||||
"""Return partname index as integer for tuple partname or None for singleton
|
||||
partname, e.g. ``21`` for ``'/ppt/slides/slide21.xml'`` and |None| for
|
||||
``'/ppt/presentation.xml'``."""
|
||||
filename = self.filename
|
||||
if not filename:
|
||||
return None
|
||||
name_part = posixpath.splitext(filename)[0] # filename w/ext removed
|
||||
match = self._filename_re.match(name_part)
|
||||
if match is None:
|
||||
return None
|
||||
if match.group(2):
|
||||
return int(match.group(2))
|
||||
return None
|
||||
|
||||
@property
|
||||
def membername(self):
|
||||
"""The pack URI with the leading slash stripped off, the form used as the Zip
|
||||
file membername for the package item.
|
||||
|
||||
Returns '' for the package pseudo-partname '/'.
|
||||
"""
|
||||
return self[1:]
|
||||
|
||||
def relative_ref(self, baseURI: str):
|
||||
"""Return string containing relative reference to package item from `baseURI`.
|
||||
|
||||
E.g. PackURI('/ppt/slideLayouts/slideLayout1.xml') would return
|
||||
'../slideLayouts/slideLayout1.xml' for baseURI '/ppt/slides'.
|
||||
"""
|
||||
# workaround for posixpath bug in 2.6, doesn't generate correct
|
||||
# relative path when `start` (second) parameter is root ('/')
|
||||
return self[1:] if baseURI == "/" else posixpath.relpath(self, baseURI)
|
||||
|
||||
@property
|
||||
def rels_uri(self):
|
||||
"""The pack URI of the .rels part corresponding to the current pack URI.
|
||||
|
||||
Only produces sensible output if the pack URI is a partname or the package
|
||||
pseudo-partname '/'.
|
||||
"""
|
||||
rels_filename = "%s.rels" % self.filename
|
||||
rels_uri_str = posixpath.join(self.baseURI, "_rels", rels_filename)
|
||||
return PackURI(rels_uri_str)
|
||||
|
||||
|
||||
PACKAGE_URI = PackURI("/")
|
||||
CONTENT_TYPES_URI = PackURI("/[Content_Types].xml")
|
||||
247
path/to/venv/lib/python3.12/site-packages/docx/opc/part.py
Normal file
247
path/to/venv/lib/python3.12/site-packages/docx/opc/part.py
Normal file
@@ -0,0 +1,247 @@
|
||||
# pyright: reportImportCycles=false
|
||||
|
||||
"""Open Packaging Convention (OPC) objects related to package parts."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Callable, Type, cast
|
||||
|
||||
from docx.opc.oxml import serialize_part_xml
|
||||
from docx.opc.packuri import PackURI
|
||||
from docx.opc.rel import Relationships
|
||||
from docx.opc.shared import cls_method_fn
|
||||
from docx.oxml.parser import parse_xml
|
||||
from docx.shared import lazyproperty
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.oxml.xmlchemy import BaseOxmlElement
|
||||
from docx.package import Package
|
||||
|
||||
|
||||
class Part:
|
||||
"""Base class for package parts.
|
||||
|
||||
Provides common properties and methods, but intended to be subclassed in client code
|
||||
to implement specific part behaviors.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
partname: PackURI,
|
||||
content_type: str,
|
||||
blob: bytes | None = None,
|
||||
package: Package | None = None,
|
||||
):
|
||||
super(Part, self).__init__()
|
||||
self._partname = partname
|
||||
self._content_type = content_type
|
||||
self._blob = blob
|
||||
self._package = package
|
||||
|
||||
def after_unmarshal(self):
|
||||
"""Entry point for post-unmarshaling processing, for example to parse the part
|
||||
XML.
|
||||
|
||||
May be overridden by subclasses without forwarding call to super.
|
||||
"""
|
||||
# don't place any code here, just catch call if not overridden by
|
||||
# subclass
|
||||
pass
|
||||
|
||||
def before_marshal(self):
|
||||
"""Entry point for pre-serialization processing, for example to finalize part
|
||||
naming if necessary.
|
||||
|
||||
May be overridden by subclasses without forwarding call to super.
|
||||
"""
|
||||
# don't place any code here, just catch call if not overridden by
|
||||
# subclass
|
||||
pass
|
||||
|
||||
@property
|
||||
def blob(self) -> bytes:
|
||||
"""Contents of this package part as a sequence of bytes.
|
||||
|
||||
May be text or binary. Intended to be overridden by subclasses. Default behavior
|
||||
is to return load blob.
|
||||
"""
|
||||
return self._blob or b""
|
||||
|
||||
@property
|
||||
def content_type(self):
|
||||
"""Content type of this part."""
|
||||
return self._content_type
|
||||
|
||||
def drop_rel(self, rId: str):
|
||||
"""Remove the relationship identified by `rId` if its reference count is less
|
||||
than 2.
|
||||
|
||||
Relationships with a reference count of 0 are implicit relationships.
|
||||
"""
|
||||
if self._rel_ref_count(rId) < 2:
|
||||
del self.rels[rId]
|
||||
|
||||
@classmethod
|
||||
def load(cls, partname: PackURI, content_type: str, blob: bytes, package: Package):
|
||||
return cls(partname, content_type, blob, package)
|
||||
|
||||
def load_rel(self, reltype: str, target: Part | str, rId: str, is_external: bool = False):
|
||||
"""Return newly added |_Relationship| instance of `reltype`.
|
||||
|
||||
The new relationship relates the `target` part to this part with key `rId`.
|
||||
|
||||
Target mode is set to ``RTM.EXTERNAL`` if `is_external` is |True|. Intended for
|
||||
use during load from a serialized package, where the rId is well-known. Other
|
||||
methods exist for adding a new relationship to a part when manipulating a part.
|
||||
"""
|
||||
return self.rels.add_relationship(reltype, target, rId, is_external)
|
||||
|
||||
@property
|
||||
def package(self):
|
||||
"""|OpcPackage| instance this part belongs to."""
|
||||
return self._package
|
||||
|
||||
@property
|
||||
def partname(self):
|
||||
"""|PackURI| instance holding partname of this part, e.g.
|
||||
'/ppt/slides/slide1.xml'."""
|
||||
return self._partname
|
||||
|
||||
@partname.setter
|
||||
def partname(self, partname: str):
|
||||
if not isinstance(partname, PackURI):
|
||||
tmpl = "partname must be instance of PackURI, got '%s'"
|
||||
raise TypeError(tmpl % type(partname).__name__)
|
||||
self._partname = partname
|
||||
|
||||
def part_related_by(self, reltype: str) -> Part:
|
||||
"""Return part to which this part has a relationship of `reltype`.
|
||||
|
||||
Raises |KeyError| if no such relationship is found and |ValueError| if more than
|
||||
one such relationship is found. Provides ability to resolve implicitly related
|
||||
part, such as Slide -> SlideLayout.
|
||||
"""
|
||||
return self.rels.part_with_reltype(reltype)
|
||||
|
||||
def relate_to(self, target: Part | str, reltype: str, is_external: bool = False) -> str:
|
||||
"""Return rId key of relationship of `reltype` to `target`.
|
||||
|
||||
The returned `rId` is from an existing relationship if there is one, otherwise a
|
||||
new relationship is created.
|
||||
"""
|
||||
if is_external:
|
||||
return self.rels.get_or_add_ext_rel(reltype, cast(str, target))
|
||||
else:
|
||||
rel = self.rels.get_or_add(reltype, cast(Part, target))
|
||||
return rel.rId
|
||||
|
||||
@property
|
||||
def related_parts(self):
|
||||
"""Dictionary mapping related parts by rId, so child objects can resolve
|
||||
explicit relationships present in the part XML, e.g. sldIdLst to a specific
|
||||
|Slide| instance."""
|
||||
return self.rels.related_parts
|
||||
|
||||
@lazyproperty
|
||||
def rels(self):
|
||||
"""|Relationships| instance holding the relationships for this part."""
|
||||
# -- prevent breakage in `python-docx-template` by retaining legacy `._rels` attribute --
|
||||
self._rels = Relationships(self._partname.baseURI)
|
||||
return self._rels
|
||||
|
||||
def target_ref(self, rId: str) -> str:
|
||||
"""Return URL contained in target ref of relationship identified by `rId`."""
|
||||
rel = self.rels[rId]
|
||||
return rel.target_ref
|
||||
|
||||
def _rel_ref_count(self, rId: str) -> int:
|
||||
"""Return the count of references in this part to the relationship identified by `rId`.
|
||||
|
||||
Only an XML part can contain references, so this is 0 for `Part`.
|
||||
"""
|
||||
return 0
|
||||
|
||||
|
||||
class PartFactory:
|
||||
"""Provides a way for client code to specify a subclass of |Part| to be constructed
|
||||
by |Unmarshaller| based on its content type and/or a custom callable.
|
||||
|
||||
Setting ``PartFactory.part_class_selector`` to a callable object will cause that
|
||||
object to be called with the parameters ``content_type, reltype``, once for each
|
||||
part in the package. If the callable returns an object, it is used as the class for
|
||||
that part. If it returns |None|, part class selection falls back to the content type
|
||||
map defined in ``PartFactory.part_type_for``. If no class is returned from either of
|
||||
these, the class contained in ``PartFactory.default_part_type`` is used to construct
|
||||
the part, which is by default ``opc.package.Part``.
|
||||
"""
|
||||
|
||||
part_class_selector: Callable[[str, str], Type[Part] | None] | None
|
||||
part_type_for: dict[str, Type[Part]] = {}
|
||||
default_part_type = Part
|
||||
|
||||
def __new__(
|
||||
cls,
|
||||
partname: PackURI,
|
||||
content_type: str,
|
||||
reltype: str,
|
||||
blob: bytes,
|
||||
package: Package,
|
||||
):
|
||||
PartClass: Type[Part] | None = None
|
||||
if cls.part_class_selector is not None:
|
||||
part_class_selector = cls_method_fn(cls, "part_class_selector")
|
||||
PartClass = part_class_selector(content_type, reltype)
|
||||
if PartClass is None:
|
||||
PartClass = cls._part_cls_for(content_type)
|
||||
return PartClass.load(partname, content_type, blob, package)
|
||||
|
||||
@classmethod
|
||||
def _part_cls_for(cls, content_type: str):
|
||||
"""Return the custom part class registered for `content_type`, or the default
|
||||
part class if no custom class is registered for `content_type`."""
|
||||
if content_type in cls.part_type_for:
|
||||
return cls.part_type_for[content_type]
|
||||
return cls.default_part_type
|
||||
|
||||
|
||||
class XmlPart(Part):
|
||||
"""Base class for package parts containing an XML payload, which is most of them.
|
||||
|
||||
Provides additional methods to the |Part| base class that take care of parsing and
|
||||
reserializing the XML payload and managing relationships to other parts.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, partname: PackURI, content_type: str, element: BaseOxmlElement, package: Package
|
||||
):
|
||||
super(XmlPart, self).__init__(partname, content_type, package=package)
|
||||
self._element = element
|
||||
|
||||
@property
|
||||
def blob(self):
|
||||
return serialize_part_xml(self._element)
|
||||
|
||||
@property
|
||||
def element(self):
|
||||
"""The root XML element of this XML part."""
|
||||
return self._element
|
||||
|
||||
@classmethod
|
||||
def load(cls, partname: PackURI, content_type: str, blob: bytes, package: Package):
|
||||
element = parse_xml(blob)
|
||||
return cls(partname, content_type, element, package)
|
||||
|
||||
@property
|
||||
def part(self):
|
||||
"""Part of the parent protocol, "children" of the document will not know the
|
||||
part that contains them so must ask their parent object.
|
||||
|
||||
That chain of delegation ends here for child objects.
|
||||
"""
|
||||
return self
|
||||
|
||||
def _rel_ref_count(self, rId: str) -> int:
|
||||
"""Return the count of references in this part's XML to the relationship
|
||||
identified by `rId`."""
|
||||
rIds = cast("list[str]", self._element.xpath("//@r:id"))
|
||||
return len([_rId for _rId in rIds if _rId == rId])
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,48 @@
|
||||
"""Core properties part, corresponds to ``/docProps/core.xml`` part in package."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from docx.opc.constants import CONTENT_TYPE as CT
|
||||
from docx.opc.coreprops import CoreProperties
|
||||
from docx.opc.packuri import PackURI
|
||||
from docx.opc.part import XmlPart
|
||||
from docx.oxml.coreprops import CT_CoreProperties
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.opc.package import OpcPackage
|
||||
|
||||
|
||||
class CorePropertiesPart(XmlPart):
|
||||
"""Corresponds to part named ``/docProps/core.xml``.
|
||||
|
||||
The "core" is short for "Dublin Core" and contains document metadata relatively common across
|
||||
documents of all types, not just DOCX.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def default(cls, package: OpcPackage):
|
||||
"""Return a new |CorePropertiesPart| object initialized with default values for
|
||||
its base properties."""
|
||||
core_properties_part = cls._new(package)
|
||||
core_properties = core_properties_part.core_properties
|
||||
core_properties.title = "Word Document"
|
||||
core_properties.last_modified_by = "python-docx"
|
||||
core_properties.revision = 1
|
||||
core_properties.modified = dt.datetime.now(dt.timezone.utc)
|
||||
return core_properties_part
|
||||
|
||||
@property
|
||||
def core_properties(self):
|
||||
"""A |CoreProperties| object providing read/write access to the core properties
|
||||
contained in this core properties part."""
|
||||
return CoreProperties(self.element)
|
||||
|
||||
@classmethod
|
||||
def _new(cls, package: OpcPackage) -> CorePropertiesPart:
|
||||
partname = PackURI("/docProps/core.xml")
|
||||
content_type = CT.OPC_CORE_PROPERTIES
|
||||
coreProperties = CT_CoreProperties.new()
|
||||
return CorePropertiesPart(partname, content_type, coreProperties, package)
|
||||
119
path/to/venv/lib/python3.12/site-packages/docx/opc/phys_pkg.py
Normal file
119
path/to/venv/lib/python3.12/site-packages/docx/opc/phys_pkg.py
Normal file
@@ -0,0 +1,119 @@
|
||||
"""Provides a general interface to a `physical` OPC package, such as a zip file."""
|
||||
|
||||
import os
|
||||
from zipfile import ZIP_DEFLATED, ZipFile, is_zipfile
|
||||
|
||||
from docx.opc.exceptions import PackageNotFoundError
|
||||
from docx.opc.packuri import CONTENT_TYPES_URI
|
||||
|
||||
|
||||
class PhysPkgReader:
|
||||
"""Factory for physical package reader objects."""
|
||||
|
||||
def __new__(cls, pkg_file):
|
||||
# if `pkg_file` is a string, treat it as a path
|
||||
if isinstance(pkg_file, str):
|
||||
if os.path.isdir(pkg_file):
|
||||
reader_cls = _DirPkgReader
|
||||
elif is_zipfile(pkg_file):
|
||||
reader_cls = _ZipPkgReader
|
||||
else:
|
||||
raise PackageNotFoundError("Package not found at '%s'" % pkg_file)
|
||||
else: # assume it's a stream and pass it to Zip reader to sort out
|
||||
reader_cls = _ZipPkgReader
|
||||
|
||||
return super(PhysPkgReader, cls).__new__(reader_cls)
|
||||
|
||||
|
||||
class PhysPkgWriter:
|
||||
"""Factory for physical package writer objects."""
|
||||
|
||||
def __new__(cls, pkg_file):
|
||||
return super(PhysPkgWriter, cls).__new__(_ZipPkgWriter)
|
||||
|
||||
|
||||
class _DirPkgReader(PhysPkgReader):
|
||||
"""Implements |PhysPkgReader| interface for an OPC package extracted into a
|
||||
directory."""
|
||||
|
||||
def __init__(self, path):
|
||||
"""`path` is the path to a directory containing an expanded package."""
|
||||
super(_DirPkgReader, self).__init__()
|
||||
self._path = os.path.abspath(path)
|
||||
|
||||
def blob_for(self, pack_uri):
|
||||
"""Return contents of file corresponding to `pack_uri` in package directory."""
|
||||
path = os.path.join(self._path, pack_uri.membername)
|
||||
with open(path, "rb") as f:
|
||||
blob = f.read()
|
||||
return blob
|
||||
|
||||
def close(self):
|
||||
"""Provides interface consistency with |ZipFileSystem|, but does nothing, a
|
||||
directory file system doesn't need closing."""
|
||||
pass
|
||||
|
||||
@property
|
||||
def content_types_xml(self):
|
||||
"""Return the `[Content_Types].xml` blob from the package."""
|
||||
return self.blob_for(CONTENT_TYPES_URI)
|
||||
|
||||
def rels_xml_for(self, source_uri):
|
||||
"""Return rels item XML for source with `source_uri`, or None if the item has no
|
||||
rels item."""
|
||||
try:
|
||||
rels_xml = self.blob_for(source_uri.rels_uri)
|
||||
except IOError:
|
||||
rels_xml = None
|
||||
return rels_xml
|
||||
|
||||
|
||||
class _ZipPkgReader(PhysPkgReader):
|
||||
"""Implements |PhysPkgReader| interface for a zip file OPC package."""
|
||||
|
||||
def __init__(self, pkg_file):
|
||||
super(_ZipPkgReader, self).__init__()
|
||||
self._zipf = ZipFile(pkg_file, "r")
|
||||
|
||||
def blob_for(self, pack_uri):
|
||||
"""Return blob corresponding to `pack_uri`.
|
||||
|
||||
Raises |ValueError| if no matching member is present in zip archive.
|
||||
"""
|
||||
return self._zipf.read(pack_uri.membername)
|
||||
|
||||
def close(self):
|
||||
"""Close the zip archive, releasing any resources it is using."""
|
||||
self._zipf.close()
|
||||
|
||||
@property
|
||||
def content_types_xml(self):
|
||||
"""Return the `[Content_Types].xml` blob from the zip package."""
|
||||
return self.blob_for(CONTENT_TYPES_URI)
|
||||
|
||||
def rels_xml_for(self, source_uri):
|
||||
"""Return rels item XML for source with `source_uri` or None if no rels item is
|
||||
present."""
|
||||
try:
|
||||
rels_xml = self.blob_for(source_uri.rels_uri)
|
||||
except KeyError:
|
||||
rels_xml = None
|
||||
return rels_xml
|
||||
|
||||
|
||||
class _ZipPkgWriter(PhysPkgWriter):
|
||||
"""Implements |PhysPkgWriter| interface for a zip file OPC package."""
|
||||
|
||||
def __init__(self, pkg_file):
|
||||
super(_ZipPkgWriter, self).__init__()
|
||||
self._zipf = ZipFile(pkg_file, "w", compression=ZIP_DEFLATED)
|
||||
|
||||
def close(self):
|
||||
"""Close the zip archive, flushing any pending physical writes and releasing any
|
||||
resources it's using."""
|
||||
self._zipf.close()
|
||||
|
||||
def write(self, pack_uri, blob):
|
||||
"""Write `blob` to this zip package with the membername corresponding to
|
||||
`pack_uri`."""
|
||||
self._zipf.writestr(pack_uri.membername, blob)
|
||||
254
path/to/venv/lib/python3.12/site-packages/docx/opc/pkgreader.py
Normal file
254
path/to/venv/lib/python3.12/site-packages/docx/opc/pkgreader.py
Normal file
@@ -0,0 +1,254 @@
|
||||
"""Low-level, read-only API to a serialized Open Packaging Convention (OPC) package."""
|
||||
|
||||
from docx.opc.constants import RELATIONSHIP_TARGET_MODE as RTM
|
||||
from docx.opc.oxml import parse_xml
|
||||
from docx.opc.packuri import PACKAGE_URI, PackURI
|
||||
from docx.opc.phys_pkg import PhysPkgReader
|
||||
from docx.opc.shared import CaseInsensitiveDict
|
||||
|
||||
|
||||
class PackageReader:
|
||||
"""Provides access to the contents of a zip-format OPC package via its
|
||||
:attr:`serialized_parts` and :attr:`pkg_srels` attributes."""
|
||||
|
||||
def __init__(self, content_types, pkg_srels, sparts):
|
||||
super(PackageReader, self).__init__()
|
||||
self._pkg_srels = pkg_srels
|
||||
self._sparts = sparts
|
||||
|
||||
@staticmethod
|
||||
def from_file(pkg_file):
|
||||
"""Return a |PackageReader| instance loaded with contents of `pkg_file`."""
|
||||
phys_reader = PhysPkgReader(pkg_file)
|
||||
content_types = _ContentTypeMap.from_xml(phys_reader.content_types_xml)
|
||||
pkg_srels = PackageReader._srels_for(phys_reader, PACKAGE_URI)
|
||||
sparts = PackageReader._load_serialized_parts(phys_reader, pkg_srels, content_types)
|
||||
phys_reader.close()
|
||||
return PackageReader(content_types, pkg_srels, sparts)
|
||||
|
||||
def iter_sparts(self):
|
||||
"""Generate a 4-tuple `(partname, content_type, reltype, blob)` for each of the
|
||||
serialized parts in the package."""
|
||||
for s in self._sparts:
|
||||
yield (s.partname, s.content_type, s.reltype, s.blob)
|
||||
|
||||
def iter_srels(self):
|
||||
"""Generate a 2-tuple `(source_uri, srel)` for each of the relationships in the
|
||||
package."""
|
||||
for srel in self._pkg_srels:
|
||||
yield (PACKAGE_URI, srel)
|
||||
for spart in self._sparts:
|
||||
for srel in spart.srels:
|
||||
yield (spart.partname, srel)
|
||||
|
||||
@staticmethod
|
||||
def _load_serialized_parts(phys_reader, pkg_srels, content_types):
|
||||
"""Return a list of |_SerializedPart| instances corresponding to the parts in
|
||||
`phys_reader` accessible by walking the relationship graph starting with
|
||||
`pkg_srels`."""
|
||||
sparts = []
|
||||
part_walker = PackageReader._walk_phys_parts(phys_reader, pkg_srels)
|
||||
for partname, blob, reltype, srels in part_walker:
|
||||
content_type = content_types[partname]
|
||||
spart = _SerializedPart(partname, content_type, reltype, blob, srels)
|
||||
sparts.append(spart)
|
||||
return tuple(sparts)
|
||||
|
||||
@staticmethod
|
||||
def _srels_for(phys_reader, source_uri):
|
||||
"""Return |_SerializedRelationships| instance populated with relationships for
|
||||
source identified by `source_uri`."""
|
||||
rels_xml = phys_reader.rels_xml_for(source_uri)
|
||||
return _SerializedRelationships.load_from_xml(source_uri.baseURI, rels_xml)
|
||||
|
||||
@staticmethod
|
||||
def _walk_phys_parts(phys_reader, srels, visited_partnames=None):
|
||||
"""Generate a 4-tuple `(partname, blob, reltype, srels)` for each of the parts
|
||||
in `phys_reader` by walking the relationship graph rooted at srels."""
|
||||
if visited_partnames is None:
|
||||
visited_partnames = []
|
||||
for srel in srels:
|
||||
if srel.is_external:
|
||||
continue
|
||||
partname = srel.target_partname
|
||||
if partname in visited_partnames:
|
||||
continue
|
||||
visited_partnames.append(partname)
|
||||
reltype = srel.reltype
|
||||
part_srels = PackageReader._srels_for(phys_reader, partname)
|
||||
blob = phys_reader.blob_for(partname)
|
||||
yield (partname, blob, reltype, part_srels)
|
||||
next_walker = PackageReader._walk_phys_parts(phys_reader, part_srels, visited_partnames)
|
||||
for partname, blob, reltype, srels in next_walker:
|
||||
yield (partname, blob, reltype, srels)
|
||||
|
||||
|
||||
class _ContentTypeMap:
|
||||
"""Value type providing dictionary semantics for looking up content type by part
|
||||
name, e.g. ``content_type = cti['/ppt/presentation.xml']``."""
|
||||
|
||||
def __init__(self):
|
||||
super(_ContentTypeMap, self).__init__()
|
||||
self._overrides = CaseInsensitiveDict()
|
||||
self._defaults = CaseInsensitiveDict()
|
||||
|
||||
def __getitem__(self, partname):
|
||||
"""Return content type for part identified by `partname`."""
|
||||
if not isinstance(partname, PackURI):
|
||||
tmpl = "_ContentTypeMap key must be <type 'PackURI'>, got %s"
|
||||
raise KeyError(tmpl % type(partname))
|
||||
if partname in self._overrides:
|
||||
return self._overrides[partname]
|
||||
if partname.ext in self._defaults:
|
||||
return self._defaults[partname.ext]
|
||||
tmpl = "no content type for partname '%s' in [Content_Types].xml"
|
||||
raise KeyError(tmpl % partname)
|
||||
|
||||
@staticmethod
|
||||
def from_xml(content_types_xml):
|
||||
"""Return a new |_ContentTypeMap| instance populated with the contents of
|
||||
`content_types_xml`."""
|
||||
types_elm = parse_xml(content_types_xml)
|
||||
ct_map = _ContentTypeMap()
|
||||
for o in types_elm.overrides:
|
||||
ct_map._add_override(o.partname, o.content_type)
|
||||
for d in types_elm.defaults:
|
||||
ct_map._add_default(d.extension, d.content_type)
|
||||
return ct_map
|
||||
|
||||
def _add_default(self, extension, content_type):
|
||||
"""Add the default mapping of `extension` to `content_type` to this content type
|
||||
mapping."""
|
||||
self._defaults[extension] = content_type
|
||||
|
||||
def _add_override(self, partname, content_type):
|
||||
"""Add the default mapping of `partname` to `content_type` to this content type
|
||||
mapping."""
|
||||
self._overrides[partname] = content_type
|
||||
|
||||
|
||||
class _SerializedPart:
|
||||
"""Value object for an OPC package part.
|
||||
|
||||
Provides access to the partname, content type, blob, and serialized relationships
|
||||
for the part.
|
||||
"""
|
||||
|
||||
def __init__(self, partname, content_type, reltype, blob, srels):
|
||||
super(_SerializedPart, self).__init__()
|
||||
self._partname = partname
|
||||
self._content_type = content_type
|
||||
self._reltype = reltype
|
||||
self._blob = blob
|
||||
self._srels = srels
|
||||
|
||||
@property
|
||||
def partname(self):
|
||||
return self._partname
|
||||
|
||||
@property
|
||||
def content_type(self):
|
||||
return self._content_type
|
||||
|
||||
@property
|
||||
def blob(self):
|
||||
return self._blob
|
||||
|
||||
@property
|
||||
def reltype(self):
|
||||
"""The referring relationship type of this part."""
|
||||
return self._reltype
|
||||
|
||||
@property
|
||||
def srels(self):
|
||||
return self._srels
|
||||
|
||||
|
||||
class _SerializedRelationship:
|
||||
"""Value object representing a serialized relationship in an OPC package.
|
||||
|
||||
Serialized, in this case, means any target part is referred to via its partname
|
||||
rather than a direct link to an in-memory |Part| object.
|
||||
"""
|
||||
|
||||
def __init__(self, baseURI, rel_elm):
|
||||
super(_SerializedRelationship, self).__init__()
|
||||
self._baseURI = baseURI
|
||||
self._rId = rel_elm.rId
|
||||
self._reltype = rel_elm.reltype
|
||||
self._target_mode = rel_elm.target_mode
|
||||
self._target_ref = rel_elm.target_ref
|
||||
|
||||
@property
|
||||
def is_external(self):
|
||||
"""True if target_mode is ``RTM.EXTERNAL``"""
|
||||
return self._target_mode == RTM.EXTERNAL
|
||||
|
||||
@property
|
||||
def reltype(self):
|
||||
"""Relationship type, like ``RT.OFFICE_DOCUMENT``"""
|
||||
return self._reltype
|
||||
|
||||
@property
|
||||
def rId(self):
|
||||
"""Relationship id, like 'rId9', corresponds to the ``Id`` attribute on the
|
||||
``CT_Relationship`` element."""
|
||||
return self._rId
|
||||
|
||||
@property
|
||||
def target_mode(self):
|
||||
"""String in ``TargetMode`` attribute of ``CT_Relationship`` element, one of
|
||||
``RTM.INTERNAL`` or ``RTM.EXTERNAL``."""
|
||||
return self._target_mode
|
||||
|
||||
@property
|
||||
def target_ref(self):
|
||||
"""String in ``Target`` attribute of ``CT_Relationship`` element, a relative
|
||||
part reference for internal target mode or an arbitrary URI, e.g. an HTTP URL,
|
||||
for external target mode."""
|
||||
return self._target_ref
|
||||
|
||||
@property
|
||||
def target_partname(self):
|
||||
"""|PackURI| instance containing partname targeted by this relationship.
|
||||
|
||||
Raises ``ValueError`` on reference if target_mode is ``'External'``. Use
|
||||
:attr:`target_mode` to check before referencing.
|
||||
"""
|
||||
if self.is_external:
|
||||
msg = (
|
||||
"target_partname attribute on Relationship is undefined w"
|
||||
'here TargetMode == "External"'
|
||||
)
|
||||
raise ValueError(msg)
|
||||
# lazy-load _target_partname attribute
|
||||
if not hasattr(self, "_target_partname"):
|
||||
self._target_partname = PackURI.from_rel_ref(self._baseURI, self.target_ref)
|
||||
return self._target_partname
|
||||
|
||||
|
||||
class _SerializedRelationships:
|
||||
"""Read-only sequence of |_SerializedRelationship| instances corresponding to the
|
||||
relationships item XML passed to constructor."""
|
||||
|
||||
def __init__(self):
|
||||
super(_SerializedRelationships, self).__init__()
|
||||
self._srels = []
|
||||
|
||||
def __iter__(self):
|
||||
"""Support iteration, e.g. 'for x in srels:'."""
|
||||
return self._srels.__iter__()
|
||||
|
||||
@staticmethod
|
||||
def load_from_xml(baseURI, rels_item_xml):
|
||||
"""Return |_SerializedRelationships| instance loaded with the relationships
|
||||
contained in `rels_item_xml`.
|
||||
|
||||
Returns an empty collection if `rels_item_xml` is |None|.
|
||||
"""
|
||||
srels = _SerializedRelationships()
|
||||
if rels_item_xml is not None:
|
||||
rels_elm = parse_xml(rels_item_xml)
|
||||
for rel_elm in rels_elm.Relationship_lst:
|
||||
srels._srels.append(_SerializedRelationship(baseURI, rel_elm))
|
||||
return srels
|
||||
115
path/to/venv/lib/python3.12/site-packages/docx/opc/pkgwriter.py
Normal file
115
path/to/venv/lib/python3.12/site-packages/docx/opc/pkgwriter.py
Normal file
@@ -0,0 +1,115 @@
|
||||
"""Provides low-level, write-only API to serialized (OPC) package.
|
||||
|
||||
OPC stands for Open Packaging Convention. This is e, essentially an implementation of
|
||||
OpcPackage.save().
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Iterable
|
||||
|
||||
from docx.opc.constants import CONTENT_TYPE as CT
|
||||
from docx.opc.oxml import CT_Types, serialize_part_xml
|
||||
from docx.opc.packuri import CONTENT_TYPES_URI, PACKAGE_URI
|
||||
from docx.opc.phys_pkg import PhysPkgWriter
|
||||
from docx.opc.shared import CaseInsensitiveDict
|
||||
from docx.opc.spec import default_content_types
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.opc.part import Part
|
||||
|
||||
|
||||
class PackageWriter:
|
||||
"""Writes a zip-format OPC package to `pkg_file`, where `pkg_file` can be either a
|
||||
path to a zip file (a string) or a file-like object.
|
||||
|
||||
Its single API method, :meth:`write`, is static, so this class is not intended to be
|
||||
instantiated.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def write(pkg_file, pkg_rels, parts):
|
||||
"""Write a physical package (.pptx file) to `pkg_file` containing `pkg_rels` and
|
||||
`parts` and a content types stream based on the content types of the parts."""
|
||||
phys_writer = PhysPkgWriter(pkg_file)
|
||||
PackageWriter._write_content_types_stream(phys_writer, parts)
|
||||
PackageWriter._write_pkg_rels(phys_writer, pkg_rels)
|
||||
PackageWriter._write_parts(phys_writer, parts)
|
||||
phys_writer.close()
|
||||
|
||||
@staticmethod
|
||||
def _write_content_types_stream(phys_writer, parts):
|
||||
"""Write ``[Content_Types].xml`` part to the physical package with an
|
||||
appropriate content type lookup target for each part in `parts`."""
|
||||
cti = _ContentTypesItem.from_parts(parts)
|
||||
phys_writer.write(CONTENT_TYPES_URI, cti.blob)
|
||||
|
||||
@staticmethod
|
||||
def _write_parts(phys_writer: PhysPkgWriter, parts: Iterable[Part]):
|
||||
"""Write the blob of each part in `parts` to the package, along with a rels item
|
||||
for its relationships if and only if it has any."""
|
||||
for part in parts:
|
||||
phys_writer.write(part.partname, part.blob)
|
||||
if len(part.rels):
|
||||
phys_writer.write(part.partname.rels_uri, part.rels.xml)
|
||||
|
||||
@staticmethod
|
||||
def _write_pkg_rels(phys_writer, pkg_rels):
|
||||
"""Write the XML rels item for `pkg_rels` ('/_rels/.rels') to the package."""
|
||||
phys_writer.write(PACKAGE_URI.rels_uri, pkg_rels.xml)
|
||||
|
||||
|
||||
class _ContentTypesItem:
|
||||
"""Service class that composes a content types item ([Content_Types].xml) based on a
|
||||
list of parts.
|
||||
|
||||
Not meant to be instantiated directly, its single interface method is xml_for(),
|
||||
e.g. ``_ContentTypesItem.xml_for(parts)``.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._defaults = CaseInsensitiveDict()
|
||||
self._overrides = {}
|
||||
|
||||
@property
|
||||
def blob(self):
|
||||
"""Return XML form of this content types item, suitable for storage as
|
||||
``[Content_Types].xml`` in an OPC package."""
|
||||
return serialize_part_xml(self._element)
|
||||
|
||||
@classmethod
|
||||
def from_parts(cls, parts):
|
||||
"""Return content types XML mapping each part in `parts` to the appropriate
|
||||
content type and suitable for storage as ``[Content_Types].xml`` in an OPC
|
||||
package."""
|
||||
cti = cls()
|
||||
cti._defaults["rels"] = CT.OPC_RELATIONSHIPS
|
||||
cti._defaults["xml"] = CT.XML
|
||||
for part in parts:
|
||||
cti._add_content_type(part.partname, part.content_type)
|
||||
return cti
|
||||
|
||||
def _add_content_type(self, partname, content_type):
|
||||
"""Add a content type for the part with `partname` and `content_type`, using a
|
||||
default or override as appropriate."""
|
||||
ext = partname.ext
|
||||
if (ext.lower(), content_type) in default_content_types:
|
||||
self._defaults[ext] = content_type
|
||||
else:
|
||||
self._overrides[partname] = content_type
|
||||
|
||||
@property
|
||||
def _element(self):
|
||||
"""Return XML form of this content types item, suitable for storage as
|
||||
``[Content_Types].xml`` in an OPC package.
|
||||
|
||||
Although the sequence of elements is not strictly significant, as an aid to
|
||||
testing and readability Default elements are sorted by extension and Override
|
||||
elements are sorted by partname.
|
||||
"""
|
||||
_types_elm = CT_Types.new()
|
||||
for ext in sorted(self._defaults.keys()):
|
||||
_types_elm.add_default(ext, self._defaults[ext])
|
||||
for partname in sorted(self._overrides.keys()):
|
||||
_types_elm.add_override(partname, self._overrides[partname])
|
||||
return _types_elm
|
||||
153
path/to/venv/lib/python3.12/site-packages/docx/opc/rel.py
Normal file
153
path/to/venv/lib/python3.12/site-packages/docx/opc/rel.py
Normal file
@@ -0,0 +1,153 @@
|
||||
"""Relationship-related objects."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Dict, cast
|
||||
|
||||
from docx.opc.oxml import CT_Relationships
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docx.opc.part import Part
|
||||
|
||||
|
||||
class Relationships(Dict[str, "_Relationship"]):
|
||||
"""Collection object for |_Relationship| instances, having list semantics."""
|
||||
|
||||
def __init__(self, baseURI: str):
|
||||
super(Relationships, self).__init__()
|
||||
self._baseURI = baseURI
|
||||
self._target_parts_by_rId: dict[str, Any] = {}
|
||||
|
||||
def add_relationship(
|
||||
self, reltype: str, target: Part | str, rId: str, is_external: bool = False
|
||||
) -> "_Relationship":
|
||||
"""Return a newly added |_Relationship| instance."""
|
||||
rel = _Relationship(rId, reltype, target, self._baseURI, is_external)
|
||||
self[rId] = rel
|
||||
if not is_external:
|
||||
self._target_parts_by_rId[rId] = target
|
||||
return rel
|
||||
|
||||
def get_or_add(self, reltype: str, target_part: Part) -> _Relationship:
|
||||
"""Return relationship of `reltype` to `target_part`, newly added if not already
|
||||
present in collection."""
|
||||
rel = self._get_matching(reltype, target_part)
|
||||
if rel is None:
|
||||
rId = self._next_rId
|
||||
rel = self.add_relationship(reltype, target_part, rId)
|
||||
return rel
|
||||
|
||||
def get_or_add_ext_rel(self, reltype: str, target_ref: str) -> str:
|
||||
"""Return rId of external relationship of `reltype` to `target_ref`, newly added
|
||||
if not already present in collection."""
|
||||
rel = self._get_matching(reltype, target_ref, is_external=True)
|
||||
if rel is None:
|
||||
rId = self._next_rId
|
||||
rel = self.add_relationship(reltype, target_ref, rId, is_external=True)
|
||||
return rel.rId
|
||||
|
||||
def part_with_reltype(self, reltype: str) -> Part:
|
||||
"""Return target part of rel with matching `reltype`, raising |KeyError| if not
|
||||
found and |ValueError| if more than one matching relationship is found."""
|
||||
rel = self._get_rel_of_type(reltype)
|
||||
return rel.target_part
|
||||
|
||||
@property
|
||||
def related_parts(self):
|
||||
"""Dict mapping rIds to target parts for all the internal relationships in the
|
||||
collection."""
|
||||
return self._target_parts_by_rId
|
||||
|
||||
@property
|
||||
def xml(self) -> str:
|
||||
"""Serialize this relationship collection into XML suitable for storage as a
|
||||
.rels file in an OPC package."""
|
||||
rels_elm = CT_Relationships.new()
|
||||
for rel in self.values():
|
||||
rels_elm.add_rel(rel.rId, rel.reltype, rel.target_ref, rel.is_external)
|
||||
return rels_elm.xml
|
||||
|
||||
def _get_matching(
|
||||
self, reltype: str, target: Part | str, is_external: bool = False
|
||||
) -> _Relationship | None:
|
||||
"""Return relationship of matching `reltype`, `target`, and `is_external` from
|
||||
collection, or None if not found."""
|
||||
|
||||
def matches(rel: _Relationship, reltype: str, target: Part | str, is_external: bool):
|
||||
if rel.reltype != reltype:
|
||||
return False
|
||||
if rel.is_external != is_external:
|
||||
return False
|
||||
rel_target = rel.target_ref if rel.is_external else rel.target_part
|
||||
return rel_target == target
|
||||
|
||||
for rel in self.values():
|
||||
if matches(rel, reltype, target, is_external):
|
||||
return rel
|
||||
return None
|
||||
|
||||
def _get_rel_of_type(self, reltype: str):
|
||||
"""Return single relationship of type `reltype` from the collection.
|
||||
|
||||
Raises |KeyError| if no matching relationship is found. Raises |ValueError| if
|
||||
more than one matching relationship is found.
|
||||
"""
|
||||
matching = [rel for rel in self.values() if rel.reltype == reltype]
|
||||
if len(matching) == 0:
|
||||
tmpl = "no relationship of type '%s' in collection"
|
||||
raise KeyError(tmpl % reltype)
|
||||
if len(matching) > 1:
|
||||
tmpl = "multiple relationships of type '%s' in collection"
|
||||
raise ValueError(tmpl % reltype)
|
||||
return matching[0]
|
||||
|
||||
@property
|
||||
def _next_rId(self) -> str: # pyright: ignore[reportReturnType]
|
||||
"""Next available rId in collection, starting from 'rId1' and making use of any
|
||||
gaps in numbering, e.g. 'rId2' for rIds ['rId1', 'rId3']."""
|
||||
for n in range(1, len(self) + 2):
|
||||
rId_candidate = "rId%d" % n # like 'rId19'
|
||||
if rId_candidate not in self:
|
||||
return rId_candidate
|
||||
|
||||
|
||||
class _Relationship:
|
||||
"""Value object for relationship to part."""
|
||||
|
||||
def __init__(
|
||||
self, rId: str, reltype: str, target: Part | str, baseURI: str, external: bool = False
|
||||
):
|
||||
super(_Relationship, self).__init__()
|
||||
self._rId = rId
|
||||
self._reltype = reltype
|
||||
self._target = target
|
||||
self._baseURI = baseURI
|
||||
self._is_external = bool(external)
|
||||
|
||||
@property
|
||||
def is_external(self) -> bool:
|
||||
return self._is_external
|
||||
|
||||
@property
|
||||
def reltype(self) -> str:
|
||||
return self._reltype
|
||||
|
||||
@property
|
||||
def rId(self) -> str:
|
||||
return self._rId
|
||||
|
||||
@property
|
||||
def target_part(self) -> Part:
|
||||
if self._is_external:
|
||||
raise ValueError(
|
||||
"target_part property on _Relationship is undefined when target mode is External"
|
||||
)
|
||||
return cast("Part", self._target)
|
||||
|
||||
@property
|
||||
def target_ref(self) -> str:
|
||||
if self._is_external:
|
||||
return cast(str, self._target)
|
||||
else:
|
||||
target = cast("Part", self._target)
|
||||
return target.partname.relative_ref(self._baseURI)
|
||||
31
path/to/venv/lib/python3.12/site-packages/docx/opc/shared.py
Normal file
31
path/to/venv/lib/python3.12/site-packages/docx/opc/shared.py
Normal file
@@ -0,0 +1,31 @@
|
||||
"""Objects shared by opc modules."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, TypeVar
|
||||
|
||||
_T = TypeVar("_T")
|
||||
|
||||
|
||||
class CaseInsensitiveDict(Dict[str, Any]):
|
||||
"""Mapping type that behaves like dict except that it matches without respect to the
|
||||
case of the key.
|
||||
|
||||
E.g. cid['A'] == cid['a']. Note this is not general-purpose, just complete enough to
|
||||
satisfy opc package needs. It assumes str keys, and that it is created empty; keys
|
||||
passed in constructor are not accounted for
|
||||
"""
|
||||
|
||||
def __contains__(self, key):
|
||||
return super(CaseInsensitiveDict, self).__contains__(key.lower())
|
||||
|
||||
def __getitem__(self, key):
|
||||
return super(CaseInsensitiveDict, self).__getitem__(key.lower())
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
return super(CaseInsensitiveDict, self).__setitem__(key.lower(), value)
|
||||
|
||||
|
||||
def cls_method_fn(cls: type, method_name: str):
|
||||
"""Return method of `cls` having `method_name`."""
|
||||
return getattr(cls, method_name)
|
||||
24
path/to/venv/lib/python3.12/site-packages/docx/opc/spec.py
Normal file
24
path/to/venv/lib/python3.12/site-packages/docx/opc/spec.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""Provides mappings that embody aspects of the Open XML spec ISO/IEC 29500."""
|
||||
|
||||
from docx.opc.constants import CONTENT_TYPE as CT
|
||||
|
||||
default_content_types = (
|
||||
("bin", CT.PML_PRINTER_SETTINGS),
|
||||
("bin", CT.SML_PRINTER_SETTINGS),
|
||||
("bin", CT.WML_PRINTER_SETTINGS),
|
||||
("bmp", CT.BMP),
|
||||
("emf", CT.X_EMF),
|
||||
("fntdata", CT.X_FONTDATA),
|
||||
("gif", CT.GIF),
|
||||
("jpe", CT.JPEG),
|
||||
("jpeg", CT.JPEG),
|
||||
("jpg", CT.JPEG),
|
||||
("png", CT.PNG),
|
||||
("rels", CT.OPC_RELATIONSHIPS),
|
||||
("tif", CT.TIFF),
|
||||
("tiff", CT.TIFF),
|
||||
("wdp", CT.MS_PHOTO),
|
||||
("wmf", CT.X_WMF),
|
||||
("xlsx", CT.SML_SHEET),
|
||||
("xml", CT.XML),
|
||||
)
|
||||
251
path/to/venv/lib/python3.12/site-packages/docx/oxml/__init__.py
Normal file
251
path/to/venv/lib/python3.12/site-packages/docx/oxml/__init__.py
Normal file
@@ -0,0 +1,251 @@
|
||||
# ruff: noqa: E402, I001
|
||||
|
||||
"""Initializes oxml sub-package.
|
||||
|
||||
This including registering custom element classes corresponding to Open XML elements.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from docx.oxml.drawing import CT_Drawing
|
||||
from docx.oxml.parser import OxmlElement, parse_xml, register_element_cls
|
||||
from docx.oxml.shape import (
|
||||
CT_Anchor,
|
||||
CT_Blip,
|
||||
CT_BlipFillProperties,
|
||||
CT_GraphicalObject,
|
||||
CT_GraphicalObjectData,
|
||||
CT_Inline,
|
||||
CT_NonVisualDrawingProps,
|
||||
CT_Picture,
|
||||
CT_PictureNonVisual,
|
||||
CT_Point2D,
|
||||
CT_PositiveSize2D,
|
||||
CT_ShapeProperties,
|
||||
CT_Transform2D,
|
||||
)
|
||||
from docx.oxml.shared import CT_DecimalNumber, CT_OnOff, CT_String
|
||||
from docx.oxml.text.hyperlink import CT_Hyperlink
|
||||
from docx.oxml.text.pagebreak import CT_LastRenderedPageBreak
|
||||
from docx.oxml.text.run import (
|
||||
CT_R,
|
||||
CT_Br,
|
||||
CT_Cr,
|
||||
CT_NoBreakHyphen,
|
||||
CT_PTab,
|
||||
CT_Text,
|
||||
)
|
||||
|
||||
# -- `OxmlElement` and `parse_xml()` are not used in this module but several downstream
|
||||
# -- "extension" packages expect to find them here and there's no compelling reason
|
||||
# -- not to republish them here so those keep working.
|
||||
__all__ = ["OxmlElement", "parse_xml"]
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# DrawingML-related elements
|
||||
|
||||
register_element_cls("a:blip", CT_Blip)
|
||||
register_element_cls("a:ext", CT_PositiveSize2D)
|
||||
register_element_cls("a:graphic", CT_GraphicalObject)
|
||||
register_element_cls("a:graphicData", CT_GraphicalObjectData)
|
||||
register_element_cls("a:off", CT_Point2D)
|
||||
register_element_cls("a:xfrm", CT_Transform2D)
|
||||
register_element_cls("pic:blipFill", CT_BlipFillProperties)
|
||||
register_element_cls("pic:cNvPr", CT_NonVisualDrawingProps)
|
||||
register_element_cls("pic:nvPicPr", CT_PictureNonVisual)
|
||||
register_element_cls("pic:pic", CT_Picture)
|
||||
register_element_cls("pic:spPr", CT_ShapeProperties)
|
||||
register_element_cls("w:drawing", CT_Drawing)
|
||||
register_element_cls("wp:anchor", CT_Anchor)
|
||||
register_element_cls("wp:docPr", CT_NonVisualDrawingProps)
|
||||
register_element_cls("wp:extent", CT_PositiveSize2D)
|
||||
register_element_cls("wp:inline", CT_Inline)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# hyperlink-related elements
|
||||
|
||||
register_element_cls("w:hyperlink", CT_Hyperlink)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# text-related elements
|
||||
|
||||
register_element_cls("w:br", CT_Br)
|
||||
register_element_cls("w:cr", CT_Cr)
|
||||
register_element_cls("w:lastRenderedPageBreak", CT_LastRenderedPageBreak)
|
||||
register_element_cls("w:noBreakHyphen", CT_NoBreakHyphen)
|
||||
register_element_cls("w:ptab", CT_PTab)
|
||||
register_element_cls("w:r", CT_R)
|
||||
register_element_cls("w:t", CT_Text)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# header/footer-related mappings
|
||||
|
||||
register_element_cls("w:evenAndOddHeaders", CT_OnOff)
|
||||
register_element_cls("w:titlePg", CT_OnOff)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# other custom element class mappings
|
||||
|
||||
from .comments import CT_Comments, CT_Comment
|
||||
|
||||
register_element_cls("w:comments", CT_Comments)
|
||||
register_element_cls("w:comment", CT_Comment)
|
||||
|
||||
from .coreprops import CT_CoreProperties
|
||||
|
||||
register_element_cls("cp:coreProperties", CT_CoreProperties)
|
||||
|
||||
from .document import CT_Body, CT_Document
|
||||
|
||||
register_element_cls("w:body", CT_Body)
|
||||
register_element_cls("w:document", CT_Document)
|
||||
|
||||
from .numbering import CT_Num, CT_Numbering, CT_NumLvl, CT_NumPr
|
||||
|
||||
register_element_cls("w:abstractNumId", CT_DecimalNumber)
|
||||
register_element_cls("w:ilvl", CT_DecimalNumber)
|
||||
register_element_cls("w:lvlOverride", CT_NumLvl)
|
||||
register_element_cls("w:num", CT_Num)
|
||||
register_element_cls("w:numId", CT_DecimalNumber)
|
||||
register_element_cls("w:numPr", CT_NumPr)
|
||||
register_element_cls("w:numbering", CT_Numbering)
|
||||
register_element_cls("w:startOverride", CT_DecimalNumber)
|
||||
|
||||
from .section import (
|
||||
CT_HdrFtr,
|
||||
CT_HdrFtrRef,
|
||||
CT_PageMar,
|
||||
CT_PageSz,
|
||||
CT_SectPr,
|
||||
CT_SectType,
|
||||
)
|
||||
|
||||
register_element_cls("w:footerReference", CT_HdrFtrRef)
|
||||
register_element_cls("w:ftr", CT_HdrFtr)
|
||||
register_element_cls("w:hdr", CT_HdrFtr)
|
||||
register_element_cls("w:headerReference", CT_HdrFtrRef)
|
||||
register_element_cls("w:pgMar", CT_PageMar)
|
||||
register_element_cls("w:pgSz", CT_PageSz)
|
||||
register_element_cls("w:sectPr", CT_SectPr)
|
||||
register_element_cls("w:type", CT_SectType)
|
||||
|
||||
from .settings import CT_Settings
|
||||
|
||||
register_element_cls("w:settings", CT_Settings)
|
||||
|
||||
from .styles import CT_LatentStyles, CT_LsdException, CT_Style, CT_Styles
|
||||
|
||||
register_element_cls("w:basedOn", CT_String)
|
||||
register_element_cls("w:latentStyles", CT_LatentStyles)
|
||||
register_element_cls("w:locked", CT_OnOff)
|
||||
register_element_cls("w:lsdException", CT_LsdException)
|
||||
register_element_cls("w:name", CT_String)
|
||||
register_element_cls("w:next", CT_String)
|
||||
register_element_cls("w:qFormat", CT_OnOff)
|
||||
register_element_cls("w:semiHidden", CT_OnOff)
|
||||
register_element_cls("w:style", CT_Style)
|
||||
register_element_cls("w:styles", CT_Styles)
|
||||
register_element_cls("w:uiPriority", CT_DecimalNumber)
|
||||
register_element_cls("w:unhideWhenUsed", CT_OnOff)
|
||||
|
||||
from .table import (
|
||||
CT_Height,
|
||||
CT_Row,
|
||||
CT_Tbl,
|
||||
CT_TblGrid,
|
||||
CT_TblGridCol,
|
||||
CT_TblLayoutType,
|
||||
CT_TblPr,
|
||||
CT_TblPrEx,
|
||||
CT_TblWidth,
|
||||
CT_Tc,
|
||||
CT_TcPr,
|
||||
CT_TrPr,
|
||||
CT_VMerge,
|
||||
CT_VerticalJc,
|
||||
)
|
||||
|
||||
register_element_cls("w:bidiVisual", CT_OnOff)
|
||||
register_element_cls("w:gridAfter", CT_DecimalNumber)
|
||||
register_element_cls("w:gridBefore", CT_DecimalNumber)
|
||||
register_element_cls("w:gridCol", CT_TblGridCol)
|
||||
register_element_cls("w:gridSpan", CT_DecimalNumber)
|
||||
register_element_cls("w:tbl", CT_Tbl)
|
||||
register_element_cls("w:tblGrid", CT_TblGrid)
|
||||
register_element_cls("w:tblLayout", CT_TblLayoutType)
|
||||
register_element_cls("w:tblPr", CT_TblPr)
|
||||
register_element_cls("w:tblPrEx", CT_TblPrEx)
|
||||
register_element_cls("w:tblStyle", CT_String)
|
||||
register_element_cls("w:tc", CT_Tc)
|
||||
register_element_cls("w:tcPr", CT_TcPr)
|
||||
register_element_cls("w:tcW", CT_TblWidth)
|
||||
register_element_cls("w:tr", CT_Row)
|
||||
register_element_cls("w:trHeight", CT_Height)
|
||||
register_element_cls("w:trPr", CT_TrPr)
|
||||
register_element_cls("w:vAlign", CT_VerticalJc)
|
||||
register_element_cls("w:vMerge", CT_VMerge)
|
||||
|
||||
from .text.font import (
|
||||
CT_Color,
|
||||
CT_Fonts,
|
||||
CT_Highlight,
|
||||
CT_HpsMeasure,
|
||||
CT_RPr,
|
||||
CT_Underline,
|
||||
CT_VerticalAlignRun,
|
||||
)
|
||||
|
||||
register_element_cls("w:b", CT_OnOff)
|
||||
register_element_cls("w:bCs", CT_OnOff)
|
||||
register_element_cls("w:caps", CT_OnOff)
|
||||
register_element_cls("w:color", CT_Color)
|
||||
register_element_cls("w:cs", CT_OnOff)
|
||||
register_element_cls("w:dstrike", CT_OnOff)
|
||||
register_element_cls("w:emboss", CT_OnOff)
|
||||
register_element_cls("w:highlight", CT_Highlight)
|
||||
register_element_cls("w:i", CT_OnOff)
|
||||
register_element_cls("w:iCs", CT_OnOff)
|
||||
register_element_cls("w:imprint", CT_OnOff)
|
||||
register_element_cls("w:noProof", CT_OnOff)
|
||||
register_element_cls("w:oMath", CT_OnOff)
|
||||
register_element_cls("w:outline", CT_OnOff)
|
||||
register_element_cls("w:rFonts", CT_Fonts)
|
||||
register_element_cls("w:rPr", CT_RPr)
|
||||
register_element_cls("w:rStyle", CT_String)
|
||||
register_element_cls("w:rtl", CT_OnOff)
|
||||
register_element_cls("w:shadow", CT_OnOff)
|
||||
register_element_cls("w:smallCaps", CT_OnOff)
|
||||
register_element_cls("w:snapToGrid", CT_OnOff)
|
||||
register_element_cls("w:specVanish", CT_OnOff)
|
||||
register_element_cls("w:strike", CT_OnOff)
|
||||
register_element_cls("w:sz", CT_HpsMeasure)
|
||||
register_element_cls("w:u", CT_Underline)
|
||||
register_element_cls("w:vanish", CT_OnOff)
|
||||
register_element_cls("w:vertAlign", CT_VerticalAlignRun)
|
||||
register_element_cls("w:webHidden", CT_OnOff)
|
||||
|
||||
from .text.paragraph import CT_P
|
||||
|
||||
register_element_cls("w:p", CT_P)
|
||||
|
||||
from .text.parfmt import (
|
||||
CT_Ind,
|
||||
CT_Jc,
|
||||
CT_PPr,
|
||||
CT_Spacing,
|
||||
CT_TabStop,
|
||||
CT_TabStops,
|
||||
)
|
||||
|
||||
register_element_cls("w:ind", CT_Ind)
|
||||
register_element_cls("w:jc", CT_Jc)
|
||||
register_element_cls("w:keepLines", CT_OnOff)
|
||||
register_element_cls("w:keepNext", CT_OnOff)
|
||||
register_element_cls("w:outlineLvl", CT_DecimalNumber)
|
||||
register_element_cls("w:pageBreakBefore", CT_OnOff)
|
||||
register_element_cls("w:pPr", CT_PPr)
|
||||
register_element_cls("w:pStyle", CT_String)
|
||||
register_element_cls("w:spacing", CT_Spacing)
|
||||
register_element_cls("w:tab", CT_TabStop)
|
||||
register_element_cls("w:tabs", CT_TabStops)
|
||||
register_element_cls("w:widowControl", CT_OnOff)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user