Initial commit (Clean history)

This commit is contained in:
anhduy-tech
2025-12-30 11:27:14 +07:00
commit ef48c93de0
19255 changed files with 3248867 additions and 0 deletions

View File

@@ -0,0 +1,306 @@
"""Constant values related to the Open Packaging Convention.
In particular it includes content types and relationship types.
"""
class CONTENT_TYPE:
"""Content type URIs (like MIME-types) that specify a part's format."""
BMP = "image/bmp"
DML_CHART = "application/vnd.openxmlformats-officedocument.drawingml.chart+xml"
DML_CHARTSHAPES = "application/vnd.openxmlformats-officedocument.drawingml.chartshapes+xml"
DML_DIAGRAM_COLORS = "application/vnd.openxmlformats-officedocument.drawingml.diagramColors+xml"
DML_DIAGRAM_DATA = "application/vnd.openxmlformats-officedocument.drawingml.diagramData+xml"
DML_DIAGRAM_LAYOUT = "application/vnd.openxmlformats-officedocument.drawingml.diagramLayout+xml"
DML_DIAGRAM_STYLE = "application/vnd.openxmlformats-officedocument.drawingml.diagramStyle+xml"
GIF = "image/gif"
JPEG = "image/jpeg"
MS_PHOTO = "image/vnd.ms-photo"
OFC_CUSTOM_PROPERTIES = "application/vnd.openxmlformats-officedocument.custom-properties+xml"
OFC_CUSTOM_XML_PROPERTIES = (
"application/vnd.openxmlformats-officedocument.customXmlProperties+xml"
)
OFC_DRAWING = "application/vnd.openxmlformats-officedocument.drawing+xml"
OFC_EXTENDED_PROPERTIES = (
"application/vnd.openxmlformats-officedocument.extended-properties+xml"
)
OFC_OLE_OBJECT = "application/vnd.openxmlformats-officedocument.oleObject"
OFC_PACKAGE = "application/vnd.openxmlformats-officedocument.package"
OFC_THEME = "application/vnd.openxmlformats-officedocument.theme+xml"
OFC_THEME_OVERRIDE = "application/vnd.openxmlformats-officedocument.themeOverride+xml"
OFC_VML_DRAWING = "application/vnd.openxmlformats-officedocument.vmlDrawing"
OPC_CORE_PROPERTIES = "application/vnd.openxmlformats-package.core-properties+xml"
OPC_DIGITAL_SIGNATURE_CERTIFICATE = (
"application/vnd.openxmlformats-package.digital-signature-certificate"
)
OPC_DIGITAL_SIGNATURE_ORIGIN = "application/vnd.openxmlformats-package.digital-signature-origin"
OPC_DIGITAL_SIGNATURE_XMLSIGNATURE = (
"application/vnd.openxmlformats-package.digital-signature-xmlsignature+xml"
)
OPC_RELATIONSHIPS = "application/vnd.openxmlformats-package.relationships+xml"
PML_COMMENTS = "application/vnd.openxmlformats-officedocument.presentationml.comments+xml"
PML_COMMENT_AUTHORS = (
"application/vnd.openxmlformats-officedocument.presentationml.commentAuthors+xml"
)
PML_HANDOUT_MASTER = (
"application/vnd.openxmlformats-officedocument.presentationml.handoutMaster+xml"
)
PML_NOTES_MASTER = (
"application/vnd.openxmlformats-officedocument.presentationml.notesMaster+xml"
)
PML_NOTES_SLIDE = "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml"
PML_PRESENTATION_MAIN = (
"application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml"
)
PML_PRES_PROPS = "application/vnd.openxmlformats-officedocument.presentationml.presProps+xml"
PML_PRINTER_SETTINGS = (
"application/vnd.openxmlformats-officedocument.presentationml.printerSettings"
)
PML_SLIDE = "application/vnd.openxmlformats-officedocument.presentationml.slide+xml"
PML_SLIDESHOW_MAIN = (
"application/vnd.openxmlformats-officedocument.presentationml.slideshow.main+xml"
)
PML_SLIDE_LAYOUT = (
"application/vnd.openxmlformats-officedocument.presentationml.slideLayout+xml"
)
PML_SLIDE_MASTER = (
"application/vnd.openxmlformats-officedocument.presentationml.slideMaster+xml"
)
PML_SLIDE_UPDATE_INFO = (
"application/vnd.openxmlformats-officedocument.presentationml.slideUpdateInfo+xml"
)
PML_TABLE_STYLES = (
"application/vnd.openxmlformats-officedocument.presentationml.tableStyles+xml"
)
PML_TAGS = "application/vnd.openxmlformats-officedocument.presentationml.tags+xml"
PML_TEMPLATE_MAIN = (
"application/vnd.openxmlformats-officedocument.presentationml.template.main+xml"
)
PML_VIEW_PROPS = "application/vnd.openxmlformats-officedocument.presentationml.viewProps+xml"
PNG = "image/png"
SML_CALC_CHAIN = "application/vnd.openxmlformats-officedocument.spreadsheetml.calcChain+xml"
SML_CHARTSHEET = "application/vnd.openxmlformats-officedocument.spreadsheetml.chartsheet+xml"
SML_COMMENTS = "application/vnd.openxmlformats-officedocument.spreadsheetml.comments+xml"
SML_CONNECTIONS = "application/vnd.openxmlformats-officedocument.spreadsheetml.connections+xml"
SML_CUSTOM_PROPERTY = (
"application/vnd.openxmlformats-officedocument.spreadsheetml.customProperty"
)
SML_DIALOGSHEET = "application/vnd.openxmlformats-officedocument.spreadsheetml.dialogsheet+xml"
SML_EXTERNAL_LINK = (
"application/vnd.openxmlformats-officedocument.spreadsheetml.externalLink+xml"
)
SML_PIVOT_CACHE_DEFINITION = (
"application/vnd.openxmlformats-officedocument.spreadsheetml.pivotCacheDefinition+xml"
)
SML_PIVOT_CACHE_RECORDS = (
"application/vnd.openxmlformats-officedocument.spreadsheetml.pivotCacheRecords+xml"
)
SML_PIVOT_TABLE = "application/vnd.openxmlformats-officedocument.spreadsheetml.pivotTable+xml"
SML_PRINTER_SETTINGS = (
"application/vnd.openxmlformats-officedocument.spreadsheetml.printerSettings"
)
SML_QUERY_TABLE = "application/vnd.openxmlformats-officedocument.spreadsheetml.queryTable+xml"
SML_REVISION_HEADERS = (
"application/vnd.openxmlformats-officedocument.spreadsheetml.revisionHeaders+xml"
)
SML_REVISION_LOG = "application/vnd.openxmlformats-officedocument.spreadsheetml.revisionLog+xml"
SML_SHARED_STRINGS = (
"application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml"
)
SML_SHEET = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
SML_SHEET_MAIN = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"
SML_SHEET_METADATA = (
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheetMetadata+xml"
)
SML_STYLES = "application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml"
SML_TABLE = "application/vnd.openxmlformats-officedocument.spreadsheetml.table+xml"
SML_TABLE_SINGLE_CELLS = (
"application/vnd.openxmlformats-officedocument.spreadsheetml.tableSingleCells+xml"
)
SML_TEMPLATE_MAIN = (
"application/vnd.openxmlformats-officedocument.spreadsheetml.template.main+xml"
)
SML_USER_NAMES = "application/vnd.openxmlformats-officedocument.spreadsheetml.userNames+xml"
SML_VOLATILE_DEPENDENCIES = (
"application/vnd.openxmlformats-officedocument.spreadsheetml.volatileDependencies+xml"
)
SML_WORKSHEET = "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"
TIFF = "image/tiff"
WML_COMMENTS = "application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"
WML_DOCUMENT = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
WML_DOCUMENT_GLOSSARY = (
"application/vnd.openxmlformats-officedocument.wordprocessingml.document.glossary+xml"
)
WML_DOCUMENT_MAIN = (
"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"
)
WML_ENDNOTES = "application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml"
WML_FONT_TABLE = "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml"
WML_FOOTER = "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"
WML_FOOTNOTES = "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml"
WML_HEADER = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"
WML_NUMBERING = "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml"
WML_PRINTER_SETTINGS = (
"application/vnd.openxmlformats-officedocument.wordprocessingml.printerSettings"
)
WML_SETTINGS = "application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml"
WML_STYLES = "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"
WML_WEB_SETTINGS = (
"application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml"
)
XML = "application/xml"
X_EMF = "image/x-emf"
X_FONTDATA = "application/x-fontdata"
X_FONT_TTF = "application/x-font-ttf"
X_WMF = "image/x-wmf"
class NAMESPACE:
"""Constant values for OPC XML namespaces."""
DML_WORDPROCESSING_DRAWING = (
"http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
)
OFC_RELATIONSHIPS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
OPC_RELATIONSHIPS = "http://schemas.openxmlformats.org/package/2006/relationships"
OPC_CONTENT_TYPES = "http://schemas.openxmlformats.org/package/2006/content-types"
WML_MAIN = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
class RELATIONSHIP_TARGET_MODE:
"""Open XML relationship target modes."""
EXTERNAL = "External"
INTERNAL = "Internal"
class RELATIONSHIP_TYPE:
AUDIO = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/audio"
A_F_CHUNK = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/aFChunk"
CALC_CHAIN = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/calcChain"
CERTIFICATE = (
"http://schemas.openxmlformats.org/package/2006/relationships/digital-signature/certificate"
)
CHART = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/chart"
CHARTSHEET = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/chartsheet"
CHART_USER_SHAPES = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/chartUserShapes"
)
COMMENTS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments"
COMMENT_AUTHORS = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/commentAuthors"
)
CONNECTIONS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/connections"
CONTROL = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/control"
CORE_PROPERTIES = (
"http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties"
)
CUSTOM_PROPERTIES = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties"
)
CUSTOM_PROPERTY = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/customProperty"
)
CUSTOM_XML = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/customXml"
CUSTOM_XML_PROPS = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/customXmlProps"
)
DIAGRAM_COLORS = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/diagramColors"
)
DIAGRAM_DATA = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/diagramData"
DIAGRAM_LAYOUT = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/diagramLayout"
)
DIAGRAM_QUICK_STYLE = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/diagramQuickStyle"
)
DIALOGSHEET = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/dialogsheet"
DRAWING = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/drawing"
ENDNOTES = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes"
EXTENDED_PROPERTIES = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties"
)
EXTERNAL_LINK = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/externalLink"
)
FONT = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/font"
FONT_TABLE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable"
FOOTER = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer"
FOOTNOTES = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes"
GLOSSARY_DOCUMENT = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/glossaryDocument"
)
HANDOUT_MASTER = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/handoutMaster"
)
HEADER = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header"
HYPERLINK = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"
IMAGE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"
NOTES_MASTER = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesMaster"
NOTES_SLIDE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide"
NUMBERING = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering"
OFFICE_DOCUMENT = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"
)
OLE_OBJECT = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject"
ORIGIN = "http://schemas.openxmlformats.org/package/2006/relationships/digital-signature/origin"
PACKAGE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/package"
PIVOT_CACHE_DEFINITION = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/pivotCacheDefinition"
)
PIVOT_CACHE_RECORDS = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships"
"/spreadsheetml/pivotCacheRecords"
)
PIVOT_TABLE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/pivotTable"
PRES_PROPS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/presProps"
PRINTER_SETTINGS = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/printerSettings"
)
QUERY_TABLE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/queryTable"
REVISION_HEADERS = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/revisionHeaders"
)
REVISION_LOG = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/revisionLog"
SETTINGS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings"
SHARED_STRINGS = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings"
)
SHEET_METADATA = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/sheetMetadata"
)
SIGNATURE = (
"http://schemas.openxmlformats.org/package/2006/relationships/digital-signature/signature"
)
SLIDE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide"
SLIDE_LAYOUT = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout"
SLIDE_MASTER = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideMaster"
SLIDE_UPDATE_INFO = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideUpdateInfo"
)
STYLES = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles"
TABLE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/table"
TABLE_SINGLE_CELLS = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/tableSingleCells"
)
TABLE_STYLES = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/tableStyles"
TAGS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/tags"
THEME = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme"
THEME_OVERRIDE = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/themeOverride"
)
THUMBNAIL = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/thumbnail"
USERNAMES = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/usernames"
VIDEO = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/video"
VIEW_PROPS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/viewProps"
VML_DRAWING = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/vmlDrawing"
VOLATILE_DEPENDENCIES = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/volatileDependencies"
)
WEB_SETTINGS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings"
WORKSHEET_SOURCE = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheetSource"
)
XML_MAPS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/xmlMaps"

View File

@@ -0,0 +1,142 @@
"""Provides CoreProperties, Dublin-Core attributes of the document.
These are broadly-standardized attributes like author, last-modified, etc.
"""
from __future__ import annotations
import datetime as dt
from typing import TYPE_CHECKING
from docx.oxml.coreprops import CT_CoreProperties
if TYPE_CHECKING:
from docx.oxml.coreprops import CT_CoreProperties
class CoreProperties:
"""Corresponds to part named ``/docProps/core.xml``, containing the core document
properties for this document package."""
def __init__(self, element: CT_CoreProperties):
self._element = element
@property
def author(self):
return self._element.author_text
@author.setter
def author(self, value: str):
self._element.author_text = value
@property
def category(self):
return self._element.category_text
@category.setter
def category(self, value: str):
self._element.category_text = value
@property
def comments(self):
return self._element.comments_text
@comments.setter
def comments(self, value: str):
self._element.comments_text = value
@property
def content_status(self):
return self._element.contentStatus_text
@content_status.setter
def content_status(self, value: str):
self._element.contentStatus_text = value
@property
def created(self):
return self._element.created_datetime
@created.setter
def created(self, value: dt.datetime):
self._element.created_datetime = value
@property
def identifier(self):
return self._element.identifier_text
@identifier.setter
def identifier(self, value: str):
self._element.identifier_text = value
@property
def keywords(self):
return self._element.keywords_text
@keywords.setter
def keywords(self, value: str):
self._element.keywords_text = value
@property
def language(self):
return self._element.language_text
@language.setter
def language(self, value: str):
self._element.language_text = value
@property
def last_modified_by(self):
return self._element.lastModifiedBy_text
@last_modified_by.setter
def last_modified_by(self, value: str):
self._element.lastModifiedBy_text = value
@property
def last_printed(self):
return self._element.lastPrinted_datetime
@last_printed.setter
def last_printed(self, value: dt.datetime):
self._element.lastPrinted_datetime = value
@property
def modified(self):
return self._element.modified_datetime
@modified.setter
def modified(self, value: dt.datetime):
self._element.modified_datetime = value
@property
def revision(self):
return self._element.revision_number
@revision.setter
def revision(self, value: int):
self._element.revision_number = value
@property
def subject(self):
return self._element.subject_text
@subject.setter
def subject(self, value: str):
self._element.subject_text = value
@property
def title(self):
return self._element.title_text
@title.setter
def title(self, value: str):
self._element.title_text = value
@property
def version(self):
return self._element.version_text
@version.setter
def version(self, value: str):
self._element.version_text = value

View File

@@ -0,0 +1,12 @@
"""Exceptions specific to python-opc.
The base exception class is OpcError.
"""
class OpcError(Exception):
"""Base error class for python-opc."""
class PackageNotFoundError(OpcError):
"""Raised when a package cannot be found at the specified path."""

View File

@@ -0,0 +1,247 @@
# pyright: reportPrivateUsage=false
"""Temporary stand-in for main oxml module.
This module came across with the PackageReader transplant. Probably much will get
replaced with objects from the pptx.oxml.core and then this module will either get
deleted or only hold the package related custom element classes.
"""
from __future__ import annotations
from typing import cast
from lxml import etree
from docx.opc.constants import NAMESPACE as NS
from docx.opc.constants import RELATIONSHIP_TARGET_MODE as RTM
# configure XML parser
element_class_lookup = etree.ElementNamespaceClassLookup()
oxml_parser = etree.XMLParser(remove_blank_text=True, resolve_entities=False)
oxml_parser.set_element_class_lookup(element_class_lookup)
nsmap = {
"ct": NS.OPC_CONTENT_TYPES,
"pr": NS.OPC_RELATIONSHIPS,
"r": NS.OFC_RELATIONSHIPS,
}
# ===========================================================================
# functions
# ===========================================================================
def parse_xml(text: str) -> etree._Element:
"""`etree.fromstring()` replacement that uses oxml parser."""
return etree.fromstring(text, oxml_parser)
def qn(tag: str) -> str:
"""Stands for "qualified name", a utility function to turn a namespace prefixed tag
name into a Clark-notation qualified tag name for lxml.
For
example, ``qn('p:cSld')`` returns ``'{http://schemas.../main}cSld'``.
"""
prefix, tagroot = tag.split(":")
uri = nsmap[prefix]
return "{%s}%s" % (uri, tagroot)
def serialize_part_xml(part_elm: etree._Element) -> bytes:
"""Serialize `part_elm` etree element to XML suitable for storage as an XML part.
That is to say, no insignificant whitespace added for readability, and an
appropriate XML declaration added with UTF-8 encoding specified.
"""
return etree.tostring(part_elm, encoding="UTF-8", standalone=True)
def serialize_for_reading(element: etree._Element) -> str:
"""Serialize `element` to human-readable XML suitable for tests.
No XML declaration.
"""
return etree.tostring(element, encoding="unicode", pretty_print=True)
# ===========================================================================
# Custom element classes
# ===========================================================================
class BaseOxmlElement(etree.ElementBase):
"""Base class for all custom element classes, to add standardized behavior to all
classes in one place."""
@property
def xml(self) -> str:
"""Return XML string for this element, suitable for testing purposes.
Pretty printed for readability and without an XML declaration at the top.
"""
return serialize_for_reading(self)
class CT_Default(BaseOxmlElement):
"""`<Default>` element that appears in `[Content_Types].xml` part.
Used to specify a default content type to be applied to any part with the specified extension.
"""
@property
def content_type(self):
"""String held in the ``ContentType`` attribute of this ``<Default>``
element."""
return self.get("ContentType")
@property
def extension(self):
"""String held in the ``Extension`` attribute of this ``<Default>`` element."""
return self.get("Extension")
@staticmethod
def new(ext: str, content_type: str):
"""Return a new ``<Default>`` element with attributes set to parameter values."""
xml = '<Default xmlns="%s"/>' % nsmap["ct"]
default = parse_xml(xml)
default.set("Extension", ext)
default.set("ContentType", content_type)
return default
class CT_Override(BaseOxmlElement):
"""``<Override>`` element, specifying the content type to be applied for a part with
the specified partname."""
@property
def content_type(self):
"""String held in the ``ContentType`` attribute of this ``<Override>``
element."""
return self.get("ContentType")
@staticmethod
def new(partname, content_type):
"""Return a new ``<Override>`` element with attributes set to parameter values."""
xml = '<Override xmlns="%s"/>' % nsmap["ct"]
override = parse_xml(xml)
override.set("PartName", partname)
override.set("ContentType", content_type)
return override
@property
def partname(self):
"""String held in the ``PartName`` attribute of this ``<Override>`` element."""
return self.get("PartName")
class CT_Relationship(BaseOxmlElement):
"""`<Relationship>` element, representing a single relationship from source to target part."""
@staticmethod
def new(rId: str, reltype: str, target: str, target_mode: str = RTM.INTERNAL):
"""Return a new ``<Relationship>`` element."""
xml = '<Relationship xmlns="%s"/>' % nsmap["pr"]
relationship = parse_xml(xml)
relationship.set("Id", rId)
relationship.set("Type", reltype)
relationship.set("Target", target)
if target_mode == RTM.EXTERNAL:
relationship.set("TargetMode", RTM.EXTERNAL)
return relationship
@property
def rId(self):
"""String held in the ``Id`` attribute of this ``<Relationship>`` element."""
return self.get("Id")
@property
def reltype(self):
"""String held in the ``Type`` attribute of this ``<Relationship>`` element."""
return self.get("Type")
@property
def target_ref(self):
"""String held in the ``Target`` attribute of this ``<Relationship>``
element."""
return self.get("Target")
@property
def target_mode(self):
"""String held in the ``TargetMode`` attribute of this ``<Relationship>``
element, either ``Internal`` or ``External``.
Defaults to ``Internal``.
"""
return self.get("TargetMode", RTM.INTERNAL)
class CT_Relationships(BaseOxmlElement):
"""``<Relationships>`` element, the root element in a .rels file."""
def add_rel(self, rId: str, reltype: str, target: str, is_external: bool = False):
"""Add a child ``<Relationship>`` element with attributes set according to
parameter values."""
target_mode = RTM.EXTERNAL if is_external else RTM.INTERNAL
relationship = CT_Relationship.new(rId, reltype, target, target_mode)
self.append(relationship)
@staticmethod
def new() -> CT_Relationships:
"""Return a new ``<Relationships>`` element."""
xml = '<Relationships xmlns="%s"/>' % nsmap["pr"]
return cast(CT_Relationships, parse_xml(xml))
@property
def Relationship_lst(self):
"""Return a list containing all the ``<Relationship>`` child elements."""
return self.findall(qn("pr:Relationship"))
@property
def xml(self):
"""Return XML string for this element, suitable for saving in a .rels stream,
not pretty printed and with an XML declaration at the top."""
return serialize_part_xml(self)
class CT_Types(BaseOxmlElement):
"""``<Types>`` element, the container element for Default and Override elements in
[Content_Types].xml."""
def add_default(self, ext, content_type):
"""Add a child ``<Default>`` element with attributes set to parameter values."""
default = CT_Default.new(ext, content_type)
self.append(default)
def add_override(self, partname, content_type):
"""Add a child ``<Override>`` element with attributes set to parameter
values."""
override = CT_Override.new(partname, content_type)
self.append(override)
@property
def defaults(self):
return self.findall(qn("ct:Default"))
@staticmethod
def new():
"""Return a new ``<Types>`` element."""
xml = '<Types xmlns="%s"/>' % nsmap["ct"]
types = parse_xml(xml)
return types
@property
def overrides(self):
return self.findall(qn("ct:Override"))
ct_namespace = element_class_lookup.get_namespace(nsmap["ct"])
ct_namespace["Default"] = CT_Default
ct_namespace["Override"] = CT_Override
ct_namespace["Types"] = CT_Types
pr_namespace = element_class_lookup.get_namespace(nsmap["pr"])
pr_namespace["Relationship"] = CT_Relationship
pr_namespace["Relationships"] = CT_Relationships

View File

@@ -0,0 +1,219 @@
"""Objects that implement reading and writing OPC packages."""
from __future__ import annotations
from typing import IO, TYPE_CHECKING, Iterator, cast
from docx.opc.constants import RELATIONSHIP_TYPE as RT
from docx.opc.packuri import PACKAGE_URI, PackURI
from docx.opc.part import PartFactory
from docx.opc.parts.coreprops import CorePropertiesPart
from docx.opc.pkgreader import PackageReader
from docx.opc.pkgwriter import PackageWriter
from docx.opc.rel import Relationships
from docx.shared import lazyproperty
if TYPE_CHECKING:
from typing_extensions import Self
from docx.opc.coreprops import CoreProperties
from docx.opc.part import Part
from docx.opc.rel import _Relationship # pyright: ignore[reportPrivateUsage]
class OpcPackage:
"""Main API class for |python-opc|.
A new instance is constructed by calling the :meth:`open` class method with a path
to a package file or file-like object containing one.
"""
def after_unmarshal(self):
"""Entry point for any post-unmarshaling processing.
May be overridden by subclasses without forwarding call to super.
"""
# don't place any code here, just catch call if not overridden by
# subclass
pass
@property
def core_properties(self) -> CoreProperties:
"""|CoreProperties| object providing read/write access to the Dublin Core
properties for this document."""
return self._core_properties_part.core_properties
def iter_rels(self) -> Iterator[_Relationship]:
"""Generate exactly one reference to each relationship in the package by
performing a depth-first traversal of the rels graph."""
def walk_rels(
source: OpcPackage | Part, visited: list[Part] | None = None
) -> Iterator[_Relationship]:
visited = [] if visited is None else visited
for rel in source.rels.values():
yield rel
if rel.is_external:
continue
part = rel.target_part
if part in visited:
continue
visited.append(part)
new_source = part
for rel in walk_rels(new_source, visited):
yield rel
for rel in walk_rels(self):
yield rel
def iter_parts(self) -> Iterator[Part]:
"""Generate exactly one reference to each of the parts in the package by
performing a depth-first traversal of the rels graph."""
def walk_parts(source, visited=[]):
for rel in source.rels.values():
if rel.is_external:
continue
part = rel.target_part
if part in visited:
continue
visited.append(part)
yield part
new_source = part
for part in walk_parts(new_source, visited):
yield part
for part in walk_parts(self):
yield part
def load_rel(self, reltype: str, target: Part | str, rId: str, is_external: bool = False):
"""Return newly added |_Relationship| instance of `reltype` between this part
and `target` with key `rId`.
Target mode is set to ``RTM.EXTERNAL`` if `is_external` is |True|. Intended for
use during load from a serialized package, where the rId is well known. Other
methods exist for adding a new relationship to the package during processing.
"""
return self.rels.add_relationship(reltype, target, rId, is_external)
@property
def main_document_part(self):
"""Return a reference to the main document part for this package.
Examples include a document part for a WordprocessingML package, a presentation
part for a PresentationML package, or a workbook part for a SpreadsheetML
package.
"""
return self.part_related_by(RT.OFFICE_DOCUMENT)
def next_partname(self, template: str) -> PackURI:
"""Return a |PackURI| instance representing partname matching `template`.
The returned part-name has the next available numeric suffix to distinguish it
from other parts of its type. `template` is a printf (%)-style template string
containing a single replacement item, a '%d' to be used to insert the integer
portion of the partname. Example: "/word/header%d.xml"
"""
partnames = {part.partname for part in self.iter_parts()}
for n in range(1, len(partnames) + 2):
candidate_partname = template % n
if candidate_partname not in partnames:
return PackURI(candidate_partname)
@classmethod
def open(cls, pkg_file: str | IO[bytes]) -> Self:
"""Return an |OpcPackage| instance loaded with the contents of `pkg_file`."""
pkg_reader = PackageReader.from_file(pkg_file)
package = cls()
Unmarshaller.unmarshal(pkg_reader, package, PartFactory)
return package
def part_related_by(self, reltype: str) -> Part:
"""Return part to which this package has a relationship of `reltype`.
Raises |KeyError| if no such relationship is found and |ValueError| if more than
one such relationship is found.
"""
return self.rels.part_with_reltype(reltype)
@property
def parts(self) -> list[Part]:
"""Return a list containing a reference to each of the parts in this package."""
return list(self.iter_parts())
def relate_to(self, part: Part, reltype: str):
"""Return rId key of new or existing relationship to `part`.
If a relationship of `reltype` to `part` already exists, its rId is returned. Otherwise a
new relationship is created and that rId is returned.
"""
rel = self.rels.get_or_add(reltype, part)
return rel.rId
@lazyproperty
def rels(self):
"""Return a reference to the |Relationships| instance holding the collection of
relationships for this package."""
return Relationships(PACKAGE_URI.baseURI)
def save(self, pkg_file: str | IO[bytes]):
"""Save this package to `pkg_file`.
`pkg_file` can be either a file-path or a file-like object.
"""
for part in self.parts:
part.before_marshal()
PackageWriter.write(pkg_file, self.rels, self.parts)
@property
def _core_properties_part(self) -> CorePropertiesPart:
"""|CorePropertiesPart| object related to this package.
Creates a default core properties part if one is not present (not common).
"""
try:
return cast(CorePropertiesPart, self.part_related_by(RT.CORE_PROPERTIES))
except KeyError:
core_properties_part = CorePropertiesPart.default(self)
self.relate_to(core_properties_part, RT.CORE_PROPERTIES)
return core_properties_part
class Unmarshaller:
"""Hosts static methods for unmarshalling a package from a |PackageReader|."""
@staticmethod
def unmarshal(pkg_reader, package, part_factory):
"""Construct graph of parts and realized relationships based on the contents of
`pkg_reader`, delegating construction of each part to `part_factory`.
Package relationships are added to `pkg`.
"""
parts = Unmarshaller._unmarshal_parts(pkg_reader, package, part_factory)
Unmarshaller._unmarshal_relationships(pkg_reader, package, parts)
for part in parts.values():
part.after_unmarshal()
package.after_unmarshal()
@staticmethod
def _unmarshal_parts(pkg_reader, package, part_factory):
"""Return a dictionary of |Part| instances unmarshalled from `pkg_reader`, keyed
by partname.
Side-effect is that each part in `pkg_reader` is constructed using
`part_factory`.
"""
parts = {}
for partname, content_type, reltype, blob in pkg_reader.iter_sparts():
parts[partname] = part_factory(partname, content_type, reltype, blob, package)
return parts
@staticmethod
def _unmarshal_relationships(pkg_reader, package, parts):
"""Add a relationship to the source object corresponding to each of the
relationships in `pkg_reader` with its target_part set to the actual target part
in `parts`."""
for source_uri, srel in pkg_reader.iter_srels():
source = package if source_uri == "/" else parts[source_uri]
target = srel.target_ref if srel.is_external else parts[srel.target_partname]
source.load_rel(srel.reltype, target, srel.rId, srel.is_external)

View File

@@ -0,0 +1,109 @@
"""Provides the PackURI value type.
Also some useful known pack URI strings such as PACKAGE_URI.
"""
from __future__ import annotations
import posixpath
import re
class PackURI(str):
"""Provides access to pack URI components such as the baseURI and the filename slice.
Behaves as |str| otherwise.
"""
_filename_re = re.compile("([a-zA-Z]+)([1-9][0-9]*)?")
def __new__(cls, pack_uri_str: str):
if pack_uri_str[0] != "/":
tmpl = "PackURI must begin with slash, got '%s'"
raise ValueError(tmpl % pack_uri_str)
return str.__new__(cls, pack_uri_str)
@staticmethod
def from_rel_ref(baseURI: str, relative_ref: str) -> PackURI:
"""The absolute PackURI formed by translating `relative_ref` onto `baseURI`."""
joined_uri = posixpath.join(baseURI, relative_ref)
abs_uri = posixpath.abspath(joined_uri)
return PackURI(abs_uri)
@property
def baseURI(self) -> str:
"""The base URI of this pack URI, the directory portion, roughly speaking.
E.g. ``'/ppt/slides'`` for ``'/ppt/slides/slide1.xml'``. For the package pseudo-
partname '/', baseURI is '/'.
"""
return posixpath.split(self)[0]
@property
def ext(self) -> str:
"""The extension portion of this pack URI, e.g. ``'xml'`` for ``'/word/document.xml'``.
Note the period is not included.
"""
# raw_ext is either empty string or starts with period, e.g. '.xml'
raw_ext = posixpath.splitext(self)[1]
return raw_ext[1:] if raw_ext.startswith(".") else raw_ext
@property
def filename(self):
"""The "filename" portion of this pack URI, e.g. ``'slide1.xml'`` for
``'/ppt/slides/slide1.xml'``.
For the package pseudo-partname '/', filename is ''.
"""
return posixpath.split(self)[1]
@property
def idx(self):
"""Return partname index as integer for tuple partname or None for singleton
partname, e.g. ``21`` for ``'/ppt/slides/slide21.xml'`` and |None| for
``'/ppt/presentation.xml'``."""
filename = self.filename
if not filename:
return None
name_part = posixpath.splitext(filename)[0] # filename w/ext removed
match = self._filename_re.match(name_part)
if match is None:
return None
if match.group(2):
return int(match.group(2))
return None
@property
def membername(self):
"""The pack URI with the leading slash stripped off, the form used as the Zip
file membername for the package item.
Returns '' for the package pseudo-partname '/'.
"""
return self[1:]
def relative_ref(self, baseURI: str):
"""Return string containing relative reference to package item from `baseURI`.
E.g. PackURI('/ppt/slideLayouts/slideLayout1.xml') would return
'../slideLayouts/slideLayout1.xml' for baseURI '/ppt/slides'.
"""
# workaround for posixpath bug in 2.6, doesn't generate correct
# relative path when `start` (second) parameter is root ('/')
return self[1:] if baseURI == "/" else posixpath.relpath(self, baseURI)
@property
def rels_uri(self):
"""The pack URI of the .rels part corresponding to the current pack URI.
Only produces sensible output if the pack URI is a partname or the package
pseudo-partname '/'.
"""
rels_filename = "%s.rels" % self.filename
rels_uri_str = posixpath.join(self.baseURI, "_rels", rels_filename)
return PackURI(rels_uri_str)
PACKAGE_URI = PackURI("/")
CONTENT_TYPES_URI = PackURI("/[Content_Types].xml")

View File

@@ -0,0 +1,247 @@
# pyright: reportImportCycles=false
"""Open Packaging Convention (OPC) objects related to package parts."""
from __future__ import annotations
from typing import TYPE_CHECKING, Callable, Type, cast
from docx.opc.oxml import serialize_part_xml
from docx.opc.packuri import PackURI
from docx.opc.rel import Relationships
from docx.opc.shared import cls_method_fn
from docx.oxml.parser import parse_xml
from docx.shared import lazyproperty
if TYPE_CHECKING:
from docx.oxml.xmlchemy import BaseOxmlElement
from docx.package import Package
class Part:
"""Base class for package parts.
Provides common properties and methods, but intended to be subclassed in client code
to implement specific part behaviors.
"""
def __init__(
self,
partname: PackURI,
content_type: str,
blob: bytes | None = None,
package: Package | None = None,
):
super(Part, self).__init__()
self._partname = partname
self._content_type = content_type
self._blob = blob
self._package = package
def after_unmarshal(self):
"""Entry point for post-unmarshaling processing, for example to parse the part
XML.
May be overridden by subclasses without forwarding call to super.
"""
# don't place any code here, just catch call if not overridden by
# subclass
pass
def before_marshal(self):
"""Entry point for pre-serialization processing, for example to finalize part
naming if necessary.
May be overridden by subclasses without forwarding call to super.
"""
# don't place any code here, just catch call if not overridden by
# subclass
pass
@property
def blob(self) -> bytes:
"""Contents of this package part as a sequence of bytes.
May be text or binary. Intended to be overridden by subclasses. Default behavior
is to return load blob.
"""
return self._blob or b""
@property
def content_type(self):
"""Content type of this part."""
return self._content_type
def drop_rel(self, rId: str):
"""Remove the relationship identified by `rId` if its reference count is less
than 2.
Relationships with a reference count of 0 are implicit relationships.
"""
if self._rel_ref_count(rId) < 2:
del self.rels[rId]
@classmethod
def load(cls, partname: PackURI, content_type: str, blob: bytes, package: Package):
return cls(partname, content_type, blob, package)
def load_rel(self, reltype: str, target: Part | str, rId: str, is_external: bool = False):
"""Return newly added |_Relationship| instance of `reltype`.
The new relationship relates the `target` part to this part with key `rId`.
Target mode is set to ``RTM.EXTERNAL`` if `is_external` is |True|. Intended for
use during load from a serialized package, where the rId is well-known. Other
methods exist for adding a new relationship to a part when manipulating a part.
"""
return self.rels.add_relationship(reltype, target, rId, is_external)
@property
def package(self):
"""|OpcPackage| instance this part belongs to."""
return self._package
@property
def partname(self):
"""|PackURI| instance holding partname of this part, e.g.
'/ppt/slides/slide1.xml'."""
return self._partname
@partname.setter
def partname(self, partname: str):
if not isinstance(partname, PackURI):
tmpl = "partname must be instance of PackURI, got '%s'"
raise TypeError(tmpl % type(partname).__name__)
self._partname = partname
def part_related_by(self, reltype: str) -> Part:
"""Return part to which this part has a relationship of `reltype`.
Raises |KeyError| if no such relationship is found and |ValueError| if more than
one such relationship is found. Provides ability to resolve implicitly related
part, such as Slide -> SlideLayout.
"""
return self.rels.part_with_reltype(reltype)
def relate_to(self, target: Part | str, reltype: str, is_external: bool = False) -> str:
"""Return rId key of relationship of `reltype` to `target`.
The returned `rId` is from an existing relationship if there is one, otherwise a
new relationship is created.
"""
if is_external:
return self.rels.get_or_add_ext_rel(reltype, cast(str, target))
else:
rel = self.rels.get_or_add(reltype, cast(Part, target))
return rel.rId
@property
def related_parts(self):
"""Dictionary mapping related parts by rId, so child objects can resolve
explicit relationships present in the part XML, e.g. sldIdLst to a specific
|Slide| instance."""
return self.rels.related_parts
@lazyproperty
def rels(self):
"""|Relationships| instance holding the relationships for this part."""
# -- prevent breakage in `python-docx-template` by retaining legacy `._rels` attribute --
self._rels = Relationships(self._partname.baseURI)
return self._rels
def target_ref(self, rId: str) -> str:
"""Return URL contained in target ref of relationship identified by `rId`."""
rel = self.rels[rId]
return rel.target_ref
def _rel_ref_count(self, rId: str) -> int:
"""Return the count of references in this part to the relationship identified by `rId`.
Only an XML part can contain references, so this is 0 for `Part`.
"""
return 0
class PartFactory:
"""Provides a way for client code to specify a subclass of |Part| to be constructed
by |Unmarshaller| based on its content type and/or a custom callable.
Setting ``PartFactory.part_class_selector`` to a callable object will cause that
object to be called with the parameters ``content_type, reltype``, once for each
part in the package. If the callable returns an object, it is used as the class for
that part. If it returns |None|, part class selection falls back to the content type
map defined in ``PartFactory.part_type_for``. If no class is returned from either of
these, the class contained in ``PartFactory.default_part_type`` is used to construct
the part, which is by default ``opc.package.Part``.
"""
part_class_selector: Callable[[str, str], Type[Part] | None] | None
part_type_for: dict[str, Type[Part]] = {}
default_part_type = Part
def __new__(
cls,
partname: PackURI,
content_type: str,
reltype: str,
blob: bytes,
package: Package,
):
PartClass: Type[Part] | None = None
if cls.part_class_selector is not None:
part_class_selector = cls_method_fn(cls, "part_class_selector")
PartClass = part_class_selector(content_type, reltype)
if PartClass is None:
PartClass = cls._part_cls_for(content_type)
return PartClass.load(partname, content_type, blob, package)
@classmethod
def _part_cls_for(cls, content_type: str):
"""Return the custom part class registered for `content_type`, or the default
part class if no custom class is registered for `content_type`."""
if content_type in cls.part_type_for:
return cls.part_type_for[content_type]
return cls.default_part_type
class XmlPart(Part):
"""Base class for package parts containing an XML payload, which is most of them.
Provides additional methods to the |Part| base class that take care of parsing and
reserializing the XML payload and managing relationships to other parts.
"""
def __init__(
self, partname: PackURI, content_type: str, element: BaseOxmlElement, package: Package
):
super(XmlPart, self).__init__(partname, content_type, package=package)
self._element = element
@property
def blob(self):
return serialize_part_xml(self._element)
@property
def element(self):
"""The root XML element of this XML part."""
return self._element
@classmethod
def load(cls, partname: PackURI, content_type: str, blob: bytes, package: Package):
element = parse_xml(blob)
return cls(partname, content_type, element, package)
@property
def part(self):
"""Part of the parent protocol, "children" of the document will not know the
part that contains them so must ask their parent object.
That chain of delegation ends here for child objects.
"""
return self
def _rel_ref_count(self, rId: str) -> int:
"""Return the count of references in this part's XML to the relationship
identified by `rId`."""
rIds = cast("list[str]", self._element.xpath("//@r:id"))
return len([_rId for _rId in rIds if _rId == rId])

View File

@@ -0,0 +1,48 @@
"""Core properties part, corresponds to ``/docProps/core.xml`` part in package."""
from __future__ import annotations
import datetime as dt
from typing import TYPE_CHECKING
from docx.opc.constants import CONTENT_TYPE as CT
from docx.opc.coreprops import CoreProperties
from docx.opc.packuri import PackURI
from docx.opc.part import XmlPart
from docx.oxml.coreprops import CT_CoreProperties
if TYPE_CHECKING:
from docx.opc.package import OpcPackage
class CorePropertiesPart(XmlPart):
"""Corresponds to part named ``/docProps/core.xml``.
The "core" is short for "Dublin Core" and contains document metadata relatively common across
documents of all types, not just DOCX.
"""
@classmethod
def default(cls, package: OpcPackage):
"""Return a new |CorePropertiesPart| object initialized with default values for
its base properties."""
core_properties_part = cls._new(package)
core_properties = core_properties_part.core_properties
core_properties.title = "Word Document"
core_properties.last_modified_by = "python-docx"
core_properties.revision = 1
core_properties.modified = dt.datetime.now(dt.timezone.utc)
return core_properties_part
@property
def core_properties(self):
"""A |CoreProperties| object providing read/write access to the core properties
contained in this core properties part."""
return CoreProperties(self.element)
@classmethod
def _new(cls, package: OpcPackage) -> CorePropertiesPart:
partname = PackURI("/docProps/core.xml")
content_type = CT.OPC_CORE_PROPERTIES
coreProperties = CT_CoreProperties.new()
return CorePropertiesPart(partname, content_type, coreProperties, package)

View File

@@ -0,0 +1,119 @@
"""Provides a general interface to a `physical` OPC package, such as a zip file."""
import os
from zipfile import ZIP_DEFLATED, ZipFile, is_zipfile
from docx.opc.exceptions import PackageNotFoundError
from docx.opc.packuri import CONTENT_TYPES_URI
class PhysPkgReader:
"""Factory for physical package reader objects."""
def __new__(cls, pkg_file):
# if `pkg_file` is a string, treat it as a path
if isinstance(pkg_file, str):
if os.path.isdir(pkg_file):
reader_cls = _DirPkgReader
elif is_zipfile(pkg_file):
reader_cls = _ZipPkgReader
else:
raise PackageNotFoundError("Package not found at '%s'" % pkg_file)
else: # assume it's a stream and pass it to Zip reader to sort out
reader_cls = _ZipPkgReader
return super(PhysPkgReader, cls).__new__(reader_cls)
class PhysPkgWriter:
"""Factory for physical package writer objects."""
def __new__(cls, pkg_file):
return super(PhysPkgWriter, cls).__new__(_ZipPkgWriter)
class _DirPkgReader(PhysPkgReader):
"""Implements |PhysPkgReader| interface for an OPC package extracted into a
directory."""
def __init__(self, path):
"""`path` is the path to a directory containing an expanded package."""
super(_DirPkgReader, self).__init__()
self._path = os.path.abspath(path)
def blob_for(self, pack_uri):
"""Return contents of file corresponding to `pack_uri` in package directory."""
path = os.path.join(self._path, pack_uri.membername)
with open(path, "rb") as f:
blob = f.read()
return blob
def close(self):
"""Provides interface consistency with |ZipFileSystem|, but does nothing, a
directory file system doesn't need closing."""
pass
@property
def content_types_xml(self):
"""Return the `[Content_Types].xml` blob from the package."""
return self.blob_for(CONTENT_TYPES_URI)
def rels_xml_for(self, source_uri):
"""Return rels item XML for source with `source_uri`, or None if the item has no
rels item."""
try:
rels_xml = self.blob_for(source_uri.rels_uri)
except IOError:
rels_xml = None
return rels_xml
class _ZipPkgReader(PhysPkgReader):
"""Implements |PhysPkgReader| interface for a zip file OPC package."""
def __init__(self, pkg_file):
super(_ZipPkgReader, self).__init__()
self._zipf = ZipFile(pkg_file, "r")
def blob_for(self, pack_uri):
"""Return blob corresponding to `pack_uri`.
Raises |ValueError| if no matching member is present in zip archive.
"""
return self._zipf.read(pack_uri.membername)
def close(self):
"""Close the zip archive, releasing any resources it is using."""
self._zipf.close()
@property
def content_types_xml(self):
"""Return the `[Content_Types].xml` blob from the zip package."""
return self.blob_for(CONTENT_TYPES_URI)
def rels_xml_for(self, source_uri):
"""Return rels item XML for source with `source_uri` or None if no rels item is
present."""
try:
rels_xml = self.blob_for(source_uri.rels_uri)
except KeyError:
rels_xml = None
return rels_xml
class _ZipPkgWriter(PhysPkgWriter):
"""Implements |PhysPkgWriter| interface for a zip file OPC package."""
def __init__(self, pkg_file):
super(_ZipPkgWriter, self).__init__()
self._zipf = ZipFile(pkg_file, "w", compression=ZIP_DEFLATED)
def close(self):
"""Close the zip archive, flushing any pending physical writes and releasing any
resources it's using."""
self._zipf.close()
def write(self, pack_uri, blob):
"""Write `blob` to this zip package with the membername corresponding to
`pack_uri`."""
self._zipf.writestr(pack_uri.membername, blob)

View File

@@ -0,0 +1,254 @@
"""Low-level, read-only API to a serialized Open Packaging Convention (OPC) package."""
from docx.opc.constants import RELATIONSHIP_TARGET_MODE as RTM
from docx.opc.oxml import parse_xml
from docx.opc.packuri import PACKAGE_URI, PackURI
from docx.opc.phys_pkg import PhysPkgReader
from docx.opc.shared import CaseInsensitiveDict
class PackageReader:
"""Provides access to the contents of a zip-format OPC package via its
:attr:`serialized_parts` and :attr:`pkg_srels` attributes."""
def __init__(self, content_types, pkg_srels, sparts):
super(PackageReader, self).__init__()
self._pkg_srels = pkg_srels
self._sparts = sparts
@staticmethod
def from_file(pkg_file):
"""Return a |PackageReader| instance loaded with contents of `pkg_file`."""
phys_reader = PhysPkgReader(pkg_file)
content_types = _ContentTypeMap.from_xml(phys_reader.content_types_xml)
pkg_srels = PackageReader._srels_for(phys_reader, PACKAGE_URI)
sparts = PackageReader._load_serialized_parts(phys_reader, pkg_srels, content_types)
phys_reader.close()
return PackageReader(content_types, pkg_srels, sparts)
def iter_sparts(self):
"""Generate a 4-tuple `(partname, content_type, reltype, blob)` for each of the
serialized parts in the package."""
for s in self._sparts:
yield (s.partname, s.content_type, s.reltype, s.blob)
def iter_srels(self):
"""Generate a 2-tuple `(source_uri, srel)` for each of the relationships in the
package."""
for srel in self._pkg_srels:
yield (PACKAGE_URI, srel)
for spart in self._sparts:
for srel in spart.srels:
yield (spart.partname, srel)
@staticmethod
def _load_serialized_parts(phys_reader, pkg_srels, content_types):
"""Return a list of |_SerializedPart| instances corresponding to the parts in
`phys_reader` accessible by walking the relationship graph starting with
`pkg_srels`."""
sparts = []
part_walker = PackageReader._walk_phys_parts(phys_reader, pkg_srels)
for partname, blob, reltype, srels in part_walker:
content_type = content_types[partname]
spart = _SerializedPart(partname, content_type, reltype, blob, srels)
sparts.append(spart)
return tuple(sparts)
@staticmethod
def _srels_for(phys_reader, source_uri):
"""Return |_SerializedRelationships| instance populated with relationships for
source identified by `source_uri`."""
rels_xml = phys_reader.rels_xml_for(source_uri)
return _SerializedRelationships.load_from_xml(source_uri.baseURI, rels_xml)
@staticmethod
def _walk_phys_parts(phys_reader, srels, visited_partnames=None):
"""Generate a 4-tuple `(partname, blob, reltype, srels)` for each of the parts
in `phys_reader` by walking the relationship graph rooted at srels."""
if visited_partnames is None:
visited_partnames = []
for srel in srels:
if srel.is_external:
continue
partname = srel.target_partname
if partname in visited_partnames:
continue
visited_partnames.append(partname)
reltype = srel.reltype
part_srels = PackageReader._srels_for(phys_reader, partname)
blob = phys_reader.blob_for(partname)
yield (partname, blob, reltype, part_srels)
next_walker = PackageReader._walk_phys_parts(phys_reader, part_srels, visited_partnames)
for partname, blob, reltype, srels in next_walker:
yield (partname, blob, reltype, srels)
class _ContentTypeMap:
"""Value type providing dictionary semantics for looking up content type by part
name, e.g. ``content_type = cti['/ppt/presentation.xml']``."""
def __init__(self):
super(_ContentTypeMap, self).__init__()
self._overrides = CaseInsensitiveDict()
self._defaults = CaseInsensitiveDict()
def __getitem__(self, partname):
"""Return content type for part identified by `partname`."""
if not isinstance(partname, PackURI):
tmpl = "_ContentTypeMap key must be <type 'PackURI'>, got %s"
raise KeyError(tmpl % type(partname))
if partname in self._overrides:
return self._overrides[partname]
if partname.ext in self._defaults:
return self._defaults[partname.ext]
tmpl = "no content type for partname '%s' in [Content_Types].xml"
raise KeyError(tmpl % partname)
@staticmethod
def from_xml(content_types_xml):
"""Return a new |_ContentTypeMap| instance populated with the contents of
`content_types_xml`."""
types_elm = parse_xml(content_types_xml)
ct_map = _ContentTypeMap()
for o in types_elm.overrides:
ct_map._add_override(o.partname, o.content_type)
for d in types_elm.defaults:
ct_map._add_default(d.extension, d.content_type)
return ct_map
def _add_default(self, extension, content_type):
"""Add the default mapping of `extension` to `content_type` to this content type
mapping."""
self._defaults[extension] = content_type
def _add_override(self, partname, content_type):
"""Add the default mapping of `partname` to `content_type` to this content type
mapping."""
self._overrides[partname] = content_type
class _SerializedPart:
"""Value object for an OPC package part.
Provides access to the partname, content type, blob, and serialized relationships
for the part.
"""
def __init__(self, partname, content_type, reltype, blob, srels):
super(_SerializedPart, self).__init__()
self._partname = partname
self._content_type = content_type
self._reltype = reltype
self._blob = blob
self._srels = srels
@property
def partname(self):
return self._partname
@property
def content_type(self):
return self._content_type
@property
def blob(self):
return self._blob
@property
def reltype(self):
"""The referring relationship type of this part."""
return self._reltype
@property
def srels(self):
return self._srels
class _SerializedRelationship:
"""Value object representing a serialized relationship in an OPC package.
Serialized, in this case, means any target part is referred to via its partname
rather than a direct link to an in-memory |Part| object.
"""
def __init__(self, baseURI, rel_elm):
super(_SerializedRelationship, self).__init__()
self._baseURI = baseURI
self._rId = rel_elm.rId
self._reltype = rel_elm.reltype
self._target_mode = rel_elm.target_mode
self._target_ref = rel_elm.target_ref
@property
def is_external(self):
"""True if target_mode is ``RTM.EXTERNAL``"""
return self._target_mode == RTM.EXTERNAL
@property
def reltype(self):
"""Relationship type, like ``RT.OFFICE_DOCUMENT``"""
return self._reltype
@property
def rId(self):
"""Relationship id, like 'rId9', corresponds to the ``Id`` attribute on the
``CT_Relationship`` element."""
return self._rId
@property
def target_mode(self):
"""String in ``TargetMode`` attribute of ``CT_Relationship`` element, one of
``RTM.INTERNAL`` or ``RTM.EXTERNAL``."""
return self._target_mode
@property
def target_ref(self):
"""String in ``Target`` attribute of ``CT_Relationship`` element, a relative
part reference for internal target mode or an arbitrary URI, e.g. an HTTP URL,
for external target mode."""
return self._target_ref
@property
def target_partname(self):
"""|PackURI| instance containing partname targeted by this relationship.
Raises ``ValueError`` on reference if target_mode is ``'External'``. Use
:attr:`target_mode` to check before referencing.
"""
if self.is_external:
msg = (
"target_partname attribute on Relationship is undefined w"
'here TargetMode == "External"'
)
raise ValueError(msg)
# lazy-load _target_partname attribute
if not hasattr(self, "_target_partname"):
self._target_partname = PackURI.from_rel_ref(self._baseURI, self.target_ref)
return self._target_partname
class _SerializedRelationships:
"""Read-only sequence of |_SerializedRelationship| instances corresponding to the
relationships item XML passed to constructor."""
def __init__(self):
super(_SerializedRelationships, self).__init__()
self._srels = []
def __iter__(self):
"""Support iteration, e.g. 'for x in srels:'."""
return self._srels.__iter__()
@staticmethod
def load_from_xml(baseURI, rels_item_xml):
"""Return |_SerializedRelationships| instance loaded with the relationships
contained in `rels_item_xml`.
Returns an empty collection if `rels_item_xml` is |None|.
"""
srels = _SerializedRelationships()
if rels_item_xml is not None:
rels_elm = parse_xml(rels_item_xml)
for rel_elm in rels_elm.Relationship_lst:
srels._srels.append(_SerializedRelationship(baseURI, rel_elm))
return srels

View File

@@ -0,0 +1,115 @@
"""Provides low-level, write-only API to serialized (OPC) package.
OPC stands for Open Packaging Convention. This is e, essentially an implementation of
OpcPackage.save().
"""
from __future__ import annotations
from typing import TYPE_CHECKING, Iterable
from docx.opc.constants import CONTENT_TYPE as CT
from docx.opc.oxml import CT_Types, serialize_part_xml
from docx.opc.packuri import CONTENT_TYPES_URI, PACKAGE_URI
from docx.opc.phys_pkg import PhysPkgWriter
from docx.opc.shared import CaseInsensitiveDict
from docx.opc.spec import default_content_types
if TYPE_CHECKING:
from docx.opc.part import Part
class PackageWriter:
"""Writes a zip-format OPC package to `pkg_file`, where `pkg_file` can be either a
path to a zip file (a string) or a file-like object.
Its single API method, :meth:`write`, is static, so this class is not intended to be
instantiated.
"""
@staticmethod
def write(pkg_file, pkg_rels, parts):
"""Write a physical package (.pptx file) to `pkg_file` containing `pkg_rels` and
`parts` and a content types stream based on the content types of the parts."""
phys_writer = PhysPkgWriter(pkg_file)
PackageWriter._write_content_types_stream(phys_writer, parts)
PackageWriter._write_pkg_rels(phys_writer, pkg_rels)
PackageWriter._write_parts(phys_writer, parts)
phys_writer.close()
@staticmethod
def _write_content_types_stream(phys_writer, parts):
"""Write ``[Content_Types].xml`` part to the physical package with an
appropriate content type lookup target for each part in `parts`."""
cti = _ContentTypesItem.from_parts(parts)
phys_writer.write(CONTENT_TYPES_URI, cti.blob)
@staticmethod
def _write_parts(phys_writer: PhysPkgWriter, parts: Iterable[Part]):
"""Write the blob of each part in `parts` to the package, along with a rels item
for its relationships if and only if it has any."""
for part in parts:
phys_writer.write(part.partname, part.blob)
if len(part.rels):
phys_writer.write(part.partname.rels_uri, part.rels.xml)
@staticmethod
def _write_pkg_rels(phys_writer, pkg_rels):
"""Write the XML rels item for `pkg_rels` ('/_rels/.rels') to the package."""
phys_writer.write(PACKAGE_URI.rels_uri, pkg_rels.xml)
class _ContentTypesItem:
"""Service class that composes a content types item ([Content_Types].xml) based on a
list of parts.
Not meant to be instantiated directly, its single interface method is xml_for(),
e.g. ``_ContentTypesItem.xml_for(parts)``.
"""
def __init__(self):
self._defaults = CaseInsensitiveDict()
self._overrides = {}
@property
def blob(self):
"""Return XML form of this content types item, suitable for storage as
``[Content_Types].xml`` in an OPC package."""
return serialize_part_xml(self._element)
@classmethod
def from_parts(cls, parts):
"""Return content types XML mapping each part in `parts` to the appropriate
content type and suitable for storage as ``[Content_Types].xml`` in an OPC
package."""
cti = cls()
cti._defaults["rels"] = CT.OPC_RELATIONSHIPS
cti._defaults["xml"] = CT.XML
for part in parts:
cti._add_content_type(part.partname, part.content_type)
return cti
def _add_content_type(self, partname, content_type):
"""Add a content type for the part with `partname` and `content_type`, using a
default or override as appropriate."""
ext = partname.ext
if (ext.lower(), content_type) in default_content_types:
self._defaults[ext] = content_type
else:
self._overrides[partname] = content_type
@property
def _element(self):
"""Return XML form of this content types item, suitable for storage as
``[Content_Types].xml`` in an OPC package.
Although the sequence of elements is not strictly significant, as an aid to
testing and readability Default elements are sorted by extension and Override
elements are sorted by partname.
"""
_types_elm = CT_Types.new()
for ext in sorted(self._defaults.keys()):
_types_elm.add_default(ext, self._defaults[ext])
for partname in sorted(self._overrides.keys()):
_types_elm.add_override(partname, self._overrides[partname])
return _types_elm

View File

@@ -0,0 +1,153 @@
"""Relationship-related objects."""
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Dict, cast
from docx.opc.oxml import CT_Relationships
if TYPE_CHECKING:
from docx.opc.part import Part
class Relationships(Dict[str, "_Relationship"]):
"""Collection object for |_Relationship| instances, having list semantics."""
def __init__(self, baseURI: str):
super(Relationships, self).__init__()
self._baseURI = baseURI
self._target_parts_by_rId: dict[str, Any] = {}
def add_relationship(
self, reltype: str, target: Part | str, rId: str, is_external: bool = False
) -> "_Relationship":
"""Return a newly added |_Relationship| instance."""
rel = _Relationship(rId, reltype, target, self._baseURI, is_external)
self[rId] = rel
if not is_external:
self._target_parts_by_rId[rId] = target
return rel
def get_or_add(self, reltype: str, target_part: Part) -> _Relationship:
"""Return relationship of `reltype` to `target_part`, newly added if not already
present in collection."""
rel = self._get_matching(reltype, target_part)
if rel is None:
rId = self._next_rId
rel = self.add_relationship(reltype, target_part, rId)
return rel
def get_or_add_ext_rel(self, reltype: str, target_ref: str) -> str:
"""Return rId of external relationship of `reltype` to `target_ref`, newly added
if not already present in collection."""
rel = self._get_matching(reltype, target_ref, is_external=True)
if rel is None:
rId = self._next_rId
rel = self.add_relationship(reltype, target_ref, rId, is_external=True)
return rel.rId
def part_with_reltype(self, reltype: str) -> Part:
"""Return target part of rel with matching `reltype`, raising |KeyError| if not
found and |ValueError| if more than one matching relationship is found."""
rel = self._get_rel_of_type(reltype)
return rel.target_part
@property
def related_parts(self):
"""Dict mapping rIds to target parts for all the internal relationships in the
collection."""
return self._target_parts_by_rId
@property
def xml(self) -> str:
"""Serialize this relationship collection into XML suitable for storage as a
.rels file in an OPC package."""
rels_elm = CT_Relationships.new()
for rel in self.values():
rels_elm.add_rel(rel.rId, rel.reltype, rel.target_ref, rel.is_external)
return rels_elm.xml
def _get_matching(
self, reltype: str, target: Part | str, is_external: bool = False
) -> _Relationship | None:
"""Return relationship of matching `reltype`, `target`, and `is_external` from
collection, or None if not found."""
def matches(rel: _Relationship, reltype: str, target: Part | str, is_external: bool):
if rel.reltype != reltype:
return False
if rel.is_external != is_external:
return False
rel_target = rel.target_ref if rel.is_external else rel.target_part
return rel_target == target
for rel in self.values():
if matches(rel, reltype, target, is_external):
return rel
return None
def _get_rel_of_type(self, reltype: str):
"""Return single relationship of type `reltype` from the collection.
Raises |KeyError| if no matching relationship is found. Raises |ValueError| if
more than one matching relationship is found.
"""
matching = [rel for rel in self.values() if rel.reltype == reltype]
if len(matching) == 0:
tmpl = "no relationship of type '%s' in collection"
raise KeyError(tmpl % reltype)
if len(matching) > 1:
tmpl = "multiple relationships of type '%s' in collection"
raise ValueError(tmpl % reltype)
return matching[0]
@property
def _next_rId(self) -> str: # pyright: ignore[reportReturnType]
"""Next available rId in collection, starting from 'rId1' and making use of any
gaps in numbering, e.g. 'rId2' for rIds ['rId1', 'rId3']."""
for n in range(1, len(self) + 2):
rId_candidate = "rId%d" % n # like 'rId19'
if rId_candidate not in self:
return rId_candidate
class _Relationship:
"""Value object for relationship to part."""
def __init__(
self, rId: str, reltype: str, target: Part | str, baseURI: str, external: bool = False
):
super(_Relationship, self).__init__()
self._rId = rId
self._reltype = reltype
self._target = target
self._baseURI = baseURI
self._is_external = bool(external)
@property
def is_external(self) -> bool:
return self._is_external
@property
def reltype(self) -> str:
return self._reltype
@property
def rId(self) -> str:
return self._rId
@property
def target_part(self) -> Part:
if self._is_external:
raise ValueError(
"target_part property on _Relationship is undefined when target mode is External"
)
return cast("Part", self._target)
@property
def target_ref(self) -> str:
if self._is_external:
return cast(str, self._target)
else:
target = cast("Part", self._target)
return target.partname.relative_ref(self._baseURI)

View File

@@ -0,0 +1,31 @@
"""Objects shared by opc modules."""
from __future__ import annotations
from typing import Any, Dict, TypeVar
_T = TypeVar("_T")
class CaseInsensitiveDict(Dict[str, Any]):
"""Mapping type that behaves like dict except that it matches without respect to the
case of the key.
E.g. cid['A'] == cid['a']. Note this is not general-purpose, just complete enough to
satisfy opc package needs. It assumes str keys, and that it is created empty; keys
passed in constructor are not accounted for
"""
def __contains__(self, key):
return super(CaseInsensitiveDict, self).__contains__(key.lower())
def __getitem__(self, key):
return super(CaseInsensitiveDict, self).__getitem__(key.lower())
def __setitem__(self, key, value):
return super(CaseInsensitiveDict, self).__setitem__(key.lower(), value)
def cls_method_fn(cls: type, method_name: str):
"""Return method of `cls` having `method_name`."""
return getattr(cls, method_name)

View File

@@ -0,0 +1,24 @@
"""Provides mappings that embody aspects of the Open XML spec ISO/IEC 29500."""
from docx.opc.constants import CONTENT_TYPE as CT
default_content_types = (
("bin", CT.PML_PRINTER_SETTINGS),
("bin", CT.SML_PRINTER_SETTINGS),
("bin", CT.WML_PRINTER_SETTINGS),
("bmp", CT.BMP),
("emf", CT.X_EMF),
("fntdata", CT.X_FONTDATA),
("gif", CT.GIF),
("jpe", CT.JPEG),
("jpeg", CT.JPEG),
("jpg", CT.JPEG),
("png", CT.PNG),
("rels", CT.OPC_RELATIONSHIPS),
("tif", CT.TIFF),
("tiff", CT.TIFF),
("wdp", CT.MS_PHOTO),
("wmf", CT.X_WMF),
("xlsx", CT.SML_SHEET),
("xml", CT.XML),
)