Initial commit (Clean history)

This commit is contained in:
anhduy-tech
2025-12-30 11:27:14 +07:00
commit ef48c93de0
19255 changed files with 3248867 additions and 0 deletions

View File

@@ -0,0 +1,51 @@
"""Contains comments added to the document."""
from __future__ import annotations
import os
from typing import TYPE_CHECKING, cast
from typing_extensions import Self
from docx.comments import Comments
from docx.opc.constants import CONTENT_TYPE as CT
from docx.opc.packuri import PackURI
from docx.oxml.comments import CT_Comments
from docx.oxml.parser import parse_xml
from docx.package import Package
from docx.parts.story import StoryPart
if TYPE_CHECKING:
from docx.oxml.comments import CT_Comments
from docx.package import Package
class CommentsPart(StoryPart):
"""Container part for comments added to the document."""
def __init__(
self, partname: PackURI, content_type: str, element: CT_Comments, package: Package
):
super().__init__(partname, content_type, element, package)
self._comments = element
@property
def comments(self) -> Comments:
"""A |Comments| proxy object for the `w:comments` root element of this part."""
return Comments(self._comments, self)
@classmethod
def default(cls, package: Package) -> Self:
"""A newly created comments part, containing a default empty `w:comments` element."""
partname = PackURI("/word/comments.xml")
content_type = CT.WML_COMMENTS
element = cast("CT_Comments", parse_xml(cls._default_comments_xml()))
return cls(partname, content_type, element, package)
@classmethod
def _default_comments_xml(cls) -> bytes:
"""A byte-string containing XML for a default comments part."""
path = os.path.join(os.path.split(__file__)[0], "..", "templates", "default-comments.xml")
with open(path, "rb") as f:
xml_bytes = f.read()
return xml_bytes

View File

@@ -0,0 +1,169 @@
"""|DocumentPart| and closely related objects."""
from __future__ import annotations
from typing import IO, TYPE_CHECKING, cast
from docx.document import Document
from docx.opc.constants import RELATIONSHIP_TYPE as RT
from docx.parts.comments import CommentsPart
from docx.parts.hdrftr import FooterPart, HeaderPart
from docx.parts.numbering import NumberingPart
from docx.parts.settings import SettingsPart
from docx.parts.story import StoryPart
from docx.parts.styles import StylesPart
from docx.shape import InlineShapes
from docx.shared import lazyproperty
if TYPE_CHECKING:
from docx.comments import Comments
from docx.enum.style import WD_STYLE_TYPE
from docx.opc.coreprops import CoreProperties
from docx.settings import Settings
from docx.styles.style import BaseStyle
class DocumentPart(StoryPart):
"""Main document part of a WordprocessingML (WML) package, aka a .docx file.
Acts as broker to other parts such as image, core properties, and style parts. It
also acts as a convenient delegate when a mid-document object needs a service
involving a remote ancestor. The `Parented.part` property inherited by many content
objects provides access to this part object for that purpose.
"""
def add_footer_part(self):
"""Return (footer_part, rId) pair for newly-created footer part."""
footer_part = FooterPart.new(self.package)
rId = self.relate_to(footer_part, RT.FOOTER)
return footer_part, rId
def add_header_part(self):
"""Return (header_part, rId) pair for newly-created header part."""
header_part = HeaderPart.new(self.package)
rId = self.relate_to(header_part, RT.HEADER)
return header_part, rId
@property
def comments(self) -> Comments:
"""|Comments| object providing access to the comments added to this document."""
return self._comments_part.comments
@property
def core_properties(self) -> CoreProperties:
"""A |CoreProperties| object providing read/write access to the core properties
of this document."""
return self.package.core_properties
@property
def document(self):
"""A |Document| object providing access to the content of this document."""
return Document(self._element, self)
def drop_header_part(self, rId: str) -> None:
"""Remove related header part identified by `rId`."""
self.drop_rel(rId)
def footer_part(self, rId: str):
"""Return |FooterPart| related by `rId`."""
return self.related_parts[rId]
def get_style(self, style_id: str | None, style_type: WD_STYLE_TYPE) -> BaseStyle:
"""Return the style in this document matching `style_id`.
Returns the default style for `style_type` if `style_id` is |None| or does not
match a defined style of `style_type`.
"""
return self.styles.get_by_id(style_id, style_type)
def get_style_id(self, style_or_name, style_type):
"""Return the style_id (|str|) of the style of `style_type` matching
`style_or_name`.
Returns |None| if the style resolves to the default style for `style_type` or if
`style_or_name` is itself |None|. Raises if `style_or_name` is a style of the
wrong type or names a style not present in the document.
"""
return self.styles.get_style_id(style_or_name, style_type)
def header_part(self, rId: str):
"""Return |HeaderPart| related by `rId`."""
return self.related_parts[rId]
@lazyproperty
def inline_shapes(self):
"""The |InlineShapes| instance containing the inline shapes in the document."""
return InlineShapes(self._element.body, self)
@lazyproperty
def numbering_part(self) -> NumberingPart:
"""A |NumberingPart| object providing access to the numbering definitions for this document.
Creates an empty numbering part if one is not present.
"""
try:
return cast(NumberingPart, self.part_related_by(RT.NUMBERING))
except KeyError:
numbering_part = NumberingPart.new()
self.relate_to(numbering_part, RT.NUMBERING)
return numbering_part
def save(self, path_or_stream: str | IO[bytes]):
"""Save this document to `path_or_stream`, which can be either a path to a
filesystem location (a string) or a file-like object."""
self.package.save(path_or_stream)
@property
def settings(self) -> Settings:
"""A |Settings| object providing access to the settings in the settings part of
this document."""
return self._settings_part.settings
@property
def styles(self):
"""A |Styles| object providing access to the styles in the styles part of this
document."""
return self._styles_part.styles
@property
def _comments_part(self) -> CommentsPart:
"""A |CommentsPart| object providing access to the comments added to this document.
Creates a default comments part if one is not present.
"""
try:
return cast(CommentsPart, self.part_related_by(RT.COMMENTS))
except KeyError:
assert self.package is not None
comments_part = CommentsPart.default(self.package)
self.relate_to(comments_part, RT.COMMENTS)
return comments_part
@property
def _settings_part(self) -> SettingsPart:
"""A |SettingsPart| object providing access to the document-level settings for
this document.
Creates a default settings part if one is not present.
"""
try:
return cast(SettingsPart, self.part_related_by(RT.SETTINGS))
except KeyError:
settings_part = SettingsPart.default(self.package)
self.relate_to(settings_part, RT.SETTINGS)
return settings_part
@property
def _styles_part(self) -> StylesPart:
"""Instance of |StylesPart| for this document.
Creates an empty styles part if one is not present.
"""
try:
return cast(StylesPart, self.part_related_by(RT.STYLES))
except KeyError:
package = self.package
assert package is not None
styles_part = StylesPart.default(package)
self.relate_to(styles_part, RT.STYLES)
return styles_part

View File

@@ -0,0 +1,53 @@
"""Header and footer part objects."""
from __future__ import annotations
import os
from typing import TYPE_CHECKING
from docx.opc.constants import CONTENT_TYPE as CT
from docx.oxml.parser import parse_xml
from docx.parts.story import StoryPart
if TYPE_CHECKING:
from docx.package import Package
class FooterPart(StoryPart):
"""Definition of a section footer."""
@classmethod
def new(cls, package: Package):
"""Return newly created footer part."""
partname = package.next_partname("/word/footer%d.xml")
content_type = CT.WML_FOOTER
element = parse_xml(cls._default_footer_xml())
return cls(partname, content_type, element, package)
@classmethod
def _default_footer_xml(cls):
"""Return bytes containing XML for a default footer part."""
path = os.path.join(os.path.split(__file__)[0], "..", "templates", "default-footer.xml")
with open(path, "rb") as f:
xml_bytes = f.read()
return xml_bytes
class HeaderPart(StoryPart):
"""Definition of a section header."""
@classmethod
def new(cls, package: Package):
"""Return newly created header part."""
partname = package.next_partname("/word/header%d.xml")
content_type = CT.WML_HEADER
element = parse_xml(cls._default_header_xml())
return cls(partname, content_type, element, package)
@classmethod
def _default_header_xml(cls):
"""Return bytes containing XML for a default header part."""
path = os.path.join(os.path.split(__file__)[0], "..", "templates", "default-header.xml")
with open(path, "rb") as f:
xml_bytes = f.read()
return xml_bytes

View File

@@ -0,0 +1,80 @@
"""The proxy class for an image part, and related objects."""
from __future__ import annotations
import hashlib
from typing import TYPE_CHECKING
from docx.image.image import Image
from docx.opc.part import Part
from docx.shared import Emu, Inches
if TYPE_CHECKING:
from docx.opc.package import OpcPackage
from docx.opc.packuri import PackURI
class ImagePart(Part):
"""An image part.
Corresponds to the target part of a relationship with type RELATIONSHIP_TYPE.IMAGE.
"""
def __init__(
self, partname: PackURI, content_type: str, blob: bytes, image: Image | None = None
):
super(ImagePart, self).__init__(partname, content_type, blob)
self._image = image
@property
def default_cx(self):
"""Native width of this image, calculated from its width in pixels and
horizontal dots per inch (dpi)."""
px_width = self.image.px_width
horz_dpi = self.image.horz_dpi
width_in_inches = px_width / horz_dpi
return Inches(width_in_inches)
@property
def default_cy(self):
"""Native height of this image, calculated from its height in pixels and
vertical dots per inch (dpi)."""
px_height = self.image.px_height
horz_dpi = self.image.horz_dpi
height_in_emu = int(round(914400 * px_height / horz_dpi))
return Emu(height_in_emu)
@property
def filename(self):
"""Filename from which this image part was originally created.
A generic name, e.g. 'image.png', is substituted if no name is available, for
example when the image was loaded from an unnamed stream. In that case a default
extension is applied based on the detected MIME type of the image.
"""
if self._image is not None:
return self._image.filename
return "image.%s" % self.partname.ext
@classmethod
def from_image(cls, image: Image, partname: PackURI):
"""Return an |ImagePart| instance newly created from `image` and assigned
`partname`."""
return ImagePart(partname, image.content_type, image.blob, image)
@property
def image(self) -> Image:
if self._image is None:
self._image = Image.from_blob(self.blob)
return self._image
@classmethod
def load(cls, partname: PackURI, content_type: str, blob: bytes, package: OpcPackage):
"""Called by ``docx.opc.package.PartFactory`` to load an image part from a
package being opened by ``Document(...)`` call."""
return cls(partname, content_type, blob)
@property
def sha1(self):
"""SHA1 hash digest of the blob of this image part."""
return hashlib.sha1(self.blob).hexdigest()

View File

@@ -0,0 +1,32 @@
"""|NumberingPart| and closely related objects."""
from ..opc.part import XmlPart
from ..shared import lazyproperty
class NumberingPart(XmlPart):
"""Proxy for the numbering.xml part containing numbering definitions for a document
or glossary."""
@classmethod
def new(cls) -> "NumberingPart":
"""Newly created numbering part, containing only the root ``<w:numbering>`` element."""
raise NotImplementedError
@lazyproperty
def numbering_definitions(self):
"""The |_NumberingDefinitions| instance containing the numbering definitions
(<w:num> element proxies) for this numbering part."""
return _NumberingDefinitions(self._element)
class _NumberingDefinitions:
"""Collection of |_NumberingDefinition| instances corresponding to the ``<w:num>``
elements in a numbering part."""
def __init__(self, numbering_elm):
super(_NumberingDefinitions, self).__init__()
self._numbering = numbering_elm
def __len__(self):
return len(self._numbering.num_lst)

View File

@@ -0,0 +1,50 @@
"""|SettingsPart| and closely related objects."""
from __future__ import annotations
import os
from typing import TYPE_CHECKING, cast
from docx.opc.constants import CONTENT_TYPE as CT
from docx.opc.packuri import PackURI
from docx.opc.part import XmlPart
from docx.oxml.parser import parse_xml
from docx.settings import Settings
if TYPE_CHECKING:
from docx.oxml.settings import CT_Settings
from docx.package import Package
class SettingsPart(XmlPart):
"""Document-level settings part of a WordprocessingML (WML) package."""
def __init__(
self, partname: PackURI, content_type: str, element: CT_Settings, package: Package
):
super().__init__(partname, content_type, element, package)
self._settings = element
@classmethod
def default(cls, package: Package):
"""Return a newly created settings part, containing a default `w:settings` element tree."""
partname = PackURI("/word/settings.xml")
content_type = CT.WML_SETTINGS
element = cast("CT_Settings", parse_xml(cls._default_settings_xml()))
return cls(partname, content_type, element, package)
@property
def settings(self) -> Settings:
"""A |Settings| proxy object for the `w:settings` element in this part.
Contains the document-level settings for this document.
"""
return Settings(self._settings)
@classmethod
def _default_settings_xml(cls):
"""Return a bytestream containing XML for a default settings part."""
path = os.path.join(os.path.split(__file__)[0], "..", "templates", "default-settings.xml")
with open(path, "rb") as f:
xml_bytes = f.read()
return xml_bytes

View File

@@ -0,0 +1,95 @@
"""|StoryPart| and related objects."""
from __future__ import annotations
from typing import IO, TYPE_CHECKING, Tuple, cast
from docx.opc.constants import RELATIONSHIP_TYPE as RT
from docx.opc.part import XmlPart
from docx.oxml.shape import CT_Inline
from docx.shared import Length, lazyproperty
if TYPE_CHECKING:
from docx.enum.style import WD_STYLE_TYPE
from docx.image.image import Image
from docx.parts.document import DocumentPart
from docx.styles.style import BaseStyle
class StoryPart(XmlPart):
"""Base class for story parts.
A story part is one that can contain textual content, such as the document-part and
header or footer parts. These all share content behaviors like `.paragraphs`,
`.add_paragraph()`, `.add_table()` etc.
"""
def get_or_add_image(self, image_descriptor: str | IO[bytes]) -> Tuple[str, Image]:
"""Return (rId, image) pair for image identified by `image_descriptor`.
`rId` is the str key (often like "rId7") for the relationship between this story
part and the image part, reused if already present, newly created if not.
`image` is an |Image| instance providing access to the properties of the image,
such as dimensions and image type.
"""
package = self._package
assert package is not None
image_part = package.get_or_add_image_part(image_descriptor)
rId = self.relate_to(image_part, RT.IMAGE)
return rId, image_part.image
def get_style(self, style_id: str | None, style_type: WD_STYLE_TYPE) -> BaseStyle:
"""Return the style in this document matching `style_id`.
Returns the default style for `style_type` if `style_id` is |None| or does not
match a defined style of `style_type`.
"""
return self._document_part.get_style(style_id, style_type)
def get_style_id(
self, style_or_name: BaseStyle | str | None, style_type: WD_STYLE_TYPE
) -> str | None:
"""Return str style_id for `style_or_name` of `style_type`.
Returns |None| if the style resolves to the default style for `style_type` or if
`style_or_name` is itself |None|. Raises if `style_or_name` is a style of the
wrong type or names a style not present in the document.
"""
return self._document_part.get_style_id(style_or_name, style_type)
def new_pic_inline(
self,
image_descriptor: str | IO[bytes],
width: int | Length | None = None,
height: int | Length | None = None,
) -> CT_Inline:
"""Return a newly-created `w:inline` element.
The element contains the image specified by `image_descriptor` and is scaled
based on the values of `width` and `height`.
"""
rId, image = self.get_or_add_image(image_descriptor)
cx, cy = image.scaled_dimensions(width, height)
shape_id, filename = self.next_id, image.filename
return CT_Inline.new_pic_inline(shape_id, rId, filename, cx, cy)
@property
def next_id(self) -> int:
"""Next available positive integer id value in this story XML document.
The value is determined by incrementing the maximum existing id value. Gaps in
the existing id sequence are not filled. The id attribute value is unique in the
document, without regard to the element type it appears on.
"""
id_str_lst = self._element.xpath("//@id")
used_ids = [int(id_str) for id_str in id_str_lst if id_str.isdigit()]
if not used_ids:
return 1
return max(used_ids) + 1
@lazyproperty
def _document_part(self) -> DocumentPart:
"""|DocumentPart| object for this package."""
package = self.package
assert package is not None
return cast("DocumentPart", package.main_document_part)

View File

@@ -0,0 +1,42 @@
"""Provides StylesPart and related objects."""
from __future__ import annotations
import os
from typing import TYPE_CHECKING
from docx.opc.constants import CONTENT_TYPE as CT
from docx.opc.packuri import PackURI
from docx.opc.part import XmlPart
from docx.oxml.parser import parse_xml
from docx.styles.styles import Styles
if TYPE_CHECKING:
from docx.opc.package import OpcPackage
class StylesPart(XmlPart):
"""Proxy for the styles.xml part containing style definitions for a document or
glossary."""
@classmethod
def default(cls, package: OpcPackage) -> StylesPart:
"""Return a newly created styles part, containing a default set of elements."""
partname = PackURI("/word/styles.xml")
content_type = CT.WML_STYLES
element = parse_xml(cls._default_styles_xml())
return cls(partname, content_type, element, package)
@property
def styles(self):
"""The |_Styles| instance containing the styles (<w:style> element proxies) for
this styles part."""
return Styles(self.element)
@classmethod
def _default_styles_xml(cls):
"""Return a bytestream containing XML for a default styles part."""
path = os.path.join(os.path.split(__file__)[0], "..", "templates", "default-styles.xml")
with open(path, "rb") as f:
xml_bytes = f.read()
return xml_bytes