Initial commit (Clean history)
This commit is contained in:
219
path/to/venv/lib/python3.12/site-packages/docx/opc/package.py
Normal file
219
path/to/venv/lib/python3.12/site-packages/docx/opc/package.py
Normal file
@@ -0,0 +1,219 @@
|
||||
"""Objects that implement reading and writing OPC packages."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import IO, TYPE_CHECKING, Iterator, cast
|
||||
|
||||
from docx.opc.constants import RELATIONSHIP_TYPE as RT
|
||||
from docx.opc.packuri import PACKAGE_URI, PackURI
|
||||
from docx.opc.part import PartFactory
|
||||
from docx.opc.parts.coreprops import CorePropertiesPart
|
||||
from docx.opc.pkgreader import PackageReader
|
||||
from docx.opc.pkgwriter import PackageWriter
|
||||
from docx.opc.rel import Relationships
|
||||
from docx.shared import lazyproperty
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing_extensions import Self
|
||||
|
||||
from docx.opc.coreprops import CoreProperties
|
||||
from docx.opc.part import Part
|
||||
from docx.opc.rel import _Relationship # pyright: ignore[reportPrivateUsage]
|
||||
|
||||
|
||||
class OpcPackage:
|
||||
"""Main API class for |python-opc|.
|
||||
|
||||
A new instance is constructed by calling the :meth:`open` class method with a path
|
||||
to a package file or file-like object containing one.
|
||||
"""
|
||||
|
||||
def after_unmarshal(self):
|
||||
"""Entry point for any post-unmarshaling processing.
|
||||
|
||||
May be overridden by subclasses without forwarding call to super.
|
||||
"""
|
||||
# don't place any code here, just catch call if not overridden by
|
||||
# subclass
|
||||
pass
|
||||
|
||||
@property
|
||||
def core_properties(self) -> CoreProperties:
|
||||
"""|CoreProperties| object providing read/write access to the Dublin Core
|
||||
properties for this document."""
|
||||
return self._core_properties_part.core_properties
|
||||
|
||||
def iter_rels(self) -> Iterator[_Relationship]:
|
||||
"""Generate exactly one reference to each relationship in the package by
|
||||
performing a depth-first traversal of the rels graph."""
|
||||
|
||||
def walk_rels(
|
||||
source: OpcPackage | Part, visited: list[Part] | None = None
|
||||
) -> Iterator[_Relationship]:
|
||||
visited = [] if visited is None else visited
|
||||
for rel in source.rels.values():
|
||||
yield rel
|
||||
if rel.is_external:
|
||||
continue
|
||||
part = rel.target_part
|
||||
if part in visited:
|
||||
continue
|
||||
visited.append(part)
|
||||
new_source = part
|
||||
for rel in walk_rels(new_source, visited):
|
||||
yield rel
|
||||
|
||||
for rel in walk_rels(self):
|
||||
yield rel
|
||||
|
||||
def iter_parts(self) -> Iterator[Part]:
|
||||
"""Generate exactly one reference to each of the parts in the package by
|
||||
performing a depth-first traversal of the rels graph."""
|
||||
|
||||
def walk_parts(source, visited=[]):
|
||||
for rel in source.rels.values():
|
||||
if rel.is_external:
|
||||
continue
|
||||
part = rel.target_part
|
||||
if part in visited:
|
||||
continue
|
||||
visited.append(part)
|
||||
yield part
|
||||
new_source = part
|
||||
for part in walk_parts(new_source, visited):
|
||||
yield part
|
||||
|
||||
for part in walk_parts(self):
|
||||
yield part
|
||||
|
||||
def load_rel(self, reltype: str, target: Part | str, rId: str, is_external: bool = False):
|
||||
"""Return newly added |_Relationship| instance of `reltype` between this part
|
||||
and `target` with key `rId`.
|
||||
|
||||
Target mode is set to ``RTM.EXTERNAL`` if `is_external` is |True|. Intended for
|
||||
use during load from a serialized package, where the rId is well known. Other
|
||||
methods exist for adding a new relationship to the package during processing.
|
||||
"""
|
||||
return self.rels.add_relationship(reltype, target, rId, is_external)
|
||||
|
||||
@property
|
||||
def main_document_part(self):
|
||||
"""Return a reference to the main document part for this package.
|
||||
|
||||
Examples include a document part for a WordprocessingML package, a presentation
|
||||
part for a PresentationML package, or a workbook part for a SpreadsheetML
|
||||
package.
|
||||
"""
|
||||
return self.part_related_by(RT.OFFICE_DOCUMENT)
|
||||
|
||||
def next_partname(self, template: str) -> PackURI:
|
||||
"""Return a |PackURI| instance representing partname matching `template`.
|
||||
|
||||
The returned part-name has the next available numeric suffix to distinguish it
|
||||
from other parts of its type. `template` is a printf (%)-style template string
|
||||
containing a single replacement item, a '%d' to be used to insert the integer
|
||||
portion of the partname. Example: "/word/header%d.xml"
|
||||
"""
|
||||
partnames = {part.partname for part in self.iter_parts()}
|
||||
for n in range(1, len(partnames) + 2):
|
||||
candidate_partname = template % n
|
||||
if candidate_partname not in partnames:
|
||||
return PackURI(candidate_partname)
|
||||
|
||||
@classmethod
|
||||
def open(cls, pkg_file: str | IO[bytes]) -> Self:
|
||||
"""Return an |OpcPackage| instance loaded with the contents of `pkg_file`."""
|
||||
pkg_reader = PackageReader.from_file(pkg_file)
|
||||
package = cls()
|
||||
Unmarshaller.unmarshal(pkg_reader, package, PartFactory)
|
||||
return package
|
||||
|
||||
def part_related_by(self, reltype: str) -> Part:
|
||||
"""Return part to which this package has a relationship of `reltype`.
|
||||
|
||||
Raises |KeyError| if no such relationship is found and |ValueError| if more than
|
||||
one such relationship is found.
|
||||
"""
|
||||
return self.rels.part_with_reltype(reltype)
|
||||
|
||||
@property
|
||||
def parts(self) -> list[Part]:
|
||||
"""Return a list containing a reference to each of the parts in this package."""
|
||||
return list(self.iter_parts())
|
||||
|
||||
def relate_to(self, part: Part, reltype: str):
|
||||
"""Return rId key of new or existing relationship to `part`.
|
||||
|
||||
If a relationship of `reltype` to `part` already exists, its rId is returned. Otherwise a
|
||||
new relationship is created and that rId is returned.
|
||||
"""
|
||||
rel = self.rels.get_or_add(reltype, part)
|
||||
return rel.rId
|
||||
|
||||
@lazyproperty
|
||||
def rels(self):
|
||||
"""Return a reference to the |Relationships| instance holding the collection of
|
||||
relationships for this package."""
|
||||
return Relationships(PACKAGE_URI.baseURI)
|
||||
|
||||
def save(self, pkg_file: str | IO[bytes]):
|
||||
"""Save this package to `pkg_file`.
|
||||
|
||||
`pkg_file` can be either a file-path or a file-like object.
|
||||
"""
|
||||
for part in self.parts:
|
||||
part.before_marshal()
|
||||
PackageWriter.write(pkg_file, self.rels, self.parts)
|
||||
|
||||
@property
|
||||
def _core_properties_part(self) -> CorePropertiesPart:
|
||||
"""|CorePropertiesPart| object related to this package.
|
||||
|
||||
Creates a default core properties part if one is not present (not common).
|
||||
"""
|
||||
try:
|
||||
return cast(CorePropertiesPart, self.part_related_by(RT.CORE_PROPERTIES))
|
||||
except KeyError:
|
||||
core_properties_part = CorePropertiesPart.default(self)
|
||||
self.relate_to(core_properties_part, RT.CORE_PROPERTIES)
|
||||
return core_properties_part
|
||||
|
||||
|
||||
class Unmarshaller:
|
||||
"""Hosts static methods for unmarshalling a package from a |PackageReader|."""
|
||||
|
||||
@staticmethod
|
||||
def unmarshal(pkg_reader, package, part_factory):
|
||||
"""Construct graph of parts and realized relationships based on the contents of
|
||||
`pkg_reader`, delegating construction of each part to `part_factory`.
|
||||
|
||||
Package relationships are added to `pkg`.
|
||||
"""
|
||||
parts = Unmarshaller._unmarshal_parts(pkg_reader, package, part_factory)
|
||||
Unmarshaller._unmarshal_relationships(pkg_reader, package, parts)
|
||||
for part in parts.values():
|
||||
part.after_unmarshal()
|
||||
package.after_unmarshal()
|
||||
|
||||
@staticmethod
|
||||
def _unmarshal_parts(pkg_reader, package, part_factory):
|
||||
"""Return a dictionary of |Part| instances unmarshalled from `pkg_reader`, keyed
|
||||
by partname.
|
||||
|
||||
Side-effect is that each part in `pkg_reader` is constructed using
|
||||
`part_factory`.
|
||||
"""
|
||||
parts = {}
|
||||
for partname, content_type, reltype, blob in pkg_reader.iter_sparts():
|
||||
parts[partname] = part_factory(partname, content_type, reltype, blob, package)
|
||||
return parts
|
||||
|
||||
@staticmethod
|
||||
def _unmarshal_relationships(pkg_reader, package, parts):
|
||||
"""Add a relationship to the source object corresponding to each of the
|
||||
relationships in `pkg_reader` with its target_part set to the actual target part
|
||||
in `parts`."""
|
||||
for source_uri, srel in pkg_reader.iter_srels():
|
||||
source = package if source_uri == "/" else parts[source_uri]
|
||||
target = srel.target_ref if srel.is_external else parts[srel.target_partname]
|
||||
source.load_rel(srel.reltype, target, srel.rId, srel.is_external)
|
||||
Reference in New Issue
Block a user