287 lines
6.9 KiB
Python
287 lines
6.9 KiB
Python
|
# coding: utf-8
|
||
|
|
||
|
from __future__ import division
|
||
|
|
||
|
import io
|
||
|
import sys
|
||
|
import posixpath
|
||
|
import zipfile
|
||
|
import functools
|
||
|
import itertools
|
||
|
from collections import OrderedDict
|
||
|
|
||
|
try:
|
||
|
from contextlib import suppress
|
||
|
except ImportError:
|
||
|
from contextlib2 import suppress
|
||
|
|
||
|
__metaclass__ = type
|
||
|
|
||
|
|
||
|
def _parents(path):
|
||
|
"""
|
||
|
Given a path with elements separated by
|
||
|
posixpath.sep, generate all parents of that path.
|
||
|
|
||
|
>>> list(_parents('b/d'))
|
||
|
['b']
|
||
|
>>> list(_parents('/b/d/'))
|
||
|
['/b']
|
||
|
>>> list(_parents('b/d/f/'))
|
||
|
['b/d', 'b']
|
||
|
>>> list(_parents('b'))
|
||
|
[]
|
||
|
>>> list(_parents(''))
|
||
|
[]
|
||
|
"""
|
||
|
return itertools.islice(_ancestry(path), 1, None)
|
||
|
|
||
|
|
||
|
def _ancestry(path):
|
||
|
"""
|
||
|
Given a path with elements separated by
|
||
|
posixpath.sep, generate all elements of that path
|
||
|
|
||
|
>>> list(_ancestry('b/d'))
|
||
|
['b/d', 'b']
|
||
|
>>> list(_ancestry('/b/d/'))
|
||
|
['/b/d', '/b']
|
||
|
>>> list(_ancestry('b/d/f/'))
|
||
|
['b/d/f', 'b/d', 'b']
|
||
|
>>> list(_ancestry('b'))
|
||
|
['b']
|
||
|
>>> list(_ancestry(''))
|
||
|
[]
|
||
|
"""
|
||
|
path = path.rstrip(posixpath.sep)
|
||
|
while path and path != posixpath.sep:
|
||
|
yield path
|
||
|
path, tail = posixpath.split(path)
|
||
|
|
||
|
|
||
|
class CompleteDirs(zipfile.ZipFile):
|
||
|
"""
|
||
|
A ZipFile subclass that ensures that implied directories
|
||
|
are always included in the namelist.
|
||
|
"""
|
||
|
|
||
|
@staticmethod
|
||
|
def _implied_dirs(names):
|
||
|
parents = itertools.chain.from_iterable(map(_parents, names))
|
||
|
# Cast names to a set for O(1) lookups
|
||
|
existing = set(names)
|
||
|
# Deduplicate entries in original order
|
||
|
implied_dirs = OrderedDict.fromkeys(
|
||
|
p + posixpath.sep for p in parents
|
||
|
if p + posixpath.sep not in existing
|
||
|
)
|
||
|
return implied_dirs
|
||
|
|
||
|
def namelist(self):
|
||
|
names = super(CompleteDirs, self).namelist()
|
||
|
return names + list(self._implied_dirs(names))
|
||
|
|
||
|
def _name_set(self):
|
||
|
return set(self.namelist())
|
||
|
|
||
|
def resolve_dir(self, name):
|
||
|
"""
|
||
|
If the name represents a directory, return that name
|
||
|
as a directory (with the trailing slash).
|
||
|
"""
|
||
|
names = self._name_set()
|
||
|
dirname = name + '/'
|
||
|
dir_match = name not in names and dirname in names
|
||
|
return dirname if dir_match else name
|
||
|
|
||
|
@classmethod
|
||
|
def make(cls, source):
|
||
|
"""
|
||
|
Given a source (filename or zipfile), return an
|
||
|
appropriate CompleteDirs subclass.
|
||
|
"""
|
||
|
if isinstance(source, CompleteDirs):
|
||
|
return source
|
||
|
|
||
|
if not isinstance(source, zipfile.ZipFile):
|
||
|
return cls(_pathlib_compat(source))
|
||
|
|
||
|
# Only allow for FastPath when supplied zipfile is read-only
|
||
|
if 'r' not in source.mode:
|
||
|
cls = CompleteDirs
|
||
|
|
||
|
res = cls.__new__(cls)
|
||
|
vars(res).update(vars(source))
|
||
|
return res
|
||
|
|
||
|
|
||
|
class FastLookup(CompleteDirs):
|
||
|
"""
|
||
|
ZipFile subclass to ensure implicit
|
||
|
dirs exist and are resolved rapidly.
|
||
|
"""
|
||
|
def namelist(self):
|
||
|
with suppress(AttributeError):
|
||
|
return self.__names
|
||
|
self.__names = super(FastLookup, self).namelist()
|
||
|
return self.__names
|
||
|
|
||
|
def _name_set(self):
|
||
|
with suppress(AttributeError):
|
||
|
return self.__lookup
|
||
|
self.__lookup = super(FastLookup, self)._name_set()
|
||
|
return self.__lookup
|
||
|
|
||
|
|
||
|
def _pathlib_compat(path):
|
||
|
"""
|
||
|
For path-like objects, convert to a filename for compatibility
|
||
|
on Python 3.6.1 and earlier.
|
||
|
"""
|
||
|
try:
|
||
|
return path.__fspath__()
|
||
|
except AttributeError:
|
||
|
return str(path)
|
||
|
|
||
|
|
||
|
class Path:
|
||
|
"""
|
||
|
A pathlib-compatible interface for zip files.
|
||
|
|
||
|
Consider a zip file with this structure::
|
||
|
|
||
|
.
|
||
|
├── a.txt
|
||
|
└── b
|
||
|
├── c.txt
|
||
|
└── d
|
||
|
└── e.txt
|
||
|
|
||
|
>>> data = io.BytesIO()
|
||
|
>>> zf = zipfile.ZipFile(data, 'w')
|
||
|
>>> zf.writestr('a.txt', 'content of a')
|
||
|
>>> zf.writestr('b/c.txt', 'content of c')
|
||
|
>>> zf.writestr('b/d/e.txt', 'content of e')
|
||
|
>>> zf.filename = 'abcde.zip'
|
||
|
|
||
|
Path accepts the zipfile object itself or a filename
|
||
|
|
||
|
>>> root = Path(zf)
|
||
|
|
||
|
From there, several path operations are available.
|
||
|
|
||
|
Directory iteration (including the zip file itself):
|
||
|
|
||
|
>>> a, b = root.iterdir()
|
||
|
>>> a
|
||
|
Path('abcde.zip', 'a.txt')
|
||
|
>>> b
|
||
|
Path('abcde.zip', 'b/')
|
||
|
|
||
|
name property:
|
||
|
|
||
|
>>> b.name
|
||
|
'b'
|
||
|
|
||
|
join with divide operator:
|
||
|
|
||
|
>>> c = b / 'c.txt'
|
||
|
>>> c
|
||
|
Path('abcde.zip', 'b/c.txt')
|
||
|
>>> c.name
|
||
|
'c.txt'
|
||
|
|
||
|
Read text:
|
||
|
|
||
|
>>> c.read_text()
|
||
|
'content of c'
|
||
|
|
||
|
existence:
|
||
|
|
||
|
>>> c.exists()
|
||
|
True
|
||
|
>>> (b / 'missing.txt').exists()
|
||
|
False
|
||
|
|
||
|
Coercion to string:
|
||
|
|
||
|
>>> str(c)
|
||
|
'abcde.zip/b/c.txt'
|
||
|
"""
|
||
|
|
||
|
__repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
|
||
|
|
||
|
def __init__(self, root, at=""):
|
||
|
self.root = FastLookup.make(root)
|
||
|
self.at = at
|
||
|
|
||
|
def open(self, mode='r', *args, **kwargs):
|
||
|
"""
|
||
|
Open this entry as text or binary following the semantics
|
||
|
of ``pathlib.Path.open()`` by passing arguments through
|
||
|
to io.TextIOWrapper().
|
||
|
"""
|
||
|
pwd = kwargs.pop('pwd', None)
|
||
|
zip_mode = mode[0]
|
||
|
stream = self.root.open(self.at, zip_mode, pwd=pwd)
|
||
|
if 'b' in mode:
|
||
|
if args or kwargs:
|
||
|
raise ValueError("encoding args invalid for binary operation")
|
||
|
return stream
|
||
|
return io.TextIOWrapper(stream, *args, **kwargs)
|
||
|
|
||
|
@property
|
||
|
def name(self):
|
||
|
return posixpath.basename(self.at.rstrip("/"))
|
||
|
|
||
|
def read_text(self, *args, **kwargs):
|
||
|
with self.open('r', *args, **kwargs) as strm:
|
||
|
return strm.read()
|
||
|
|
||
|
def read_bytes(self):
|
||
|
with self.open('rb') as strm:
|
||
|
return strm.read()
|
||
|
|
||
|
def _is_child(self, path):
|
||
|
return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
|
||
|
|
||
|
def _next(self, at):
|
||
|
return Path(self.root, at)
|
||
|
|
||
|
def is_dir(self):
|
||
|
return not self.at or self.at.endswith("/")
|
||
|
|
||
|
def is_file(self):
|
||
|
return not self.is_dir()
|
||
|
|
||
|
def exists(self):
|
||
|
return self.at in self.root._name_set()
|
||
|
|
||
|
def iterdir(self):
|
||
|
if not self.is_dir():
|
||
|
raise ValueError("Can't listdir a file")
|
||
|
subs = map(self._next, self.root.namelist())
|
||
|
return filter(self._is_child, subs)
|
||
|
|
||
|
def __str__(self):
|
||
|
return posixpath.join(self.root.filename, self.at)
|
||
|
|
||
|
def __repr__(self):
|
||
|
return self.__repr.format(self=self)
|
||
|
|
||
|
def joinpath(self, add):
|
||
|
next = posixpath.join(self.at, _pathlib_compat(add))
|
||
|
return self._next(self.root.resolve_dir(next))
|
||
|
|
||
|
__truediv__ = joinpath
|
||
|
|
||
|
@property
|
||
|
def parent(self):
|
||
|
parent_at = posixpath.dirname(self.at.rstrip('/'))
|
||
|
if parent_at:
|
||
|
parent_at += '/'
|
||
|
return self._next(parent_at)
|
||
|
|
||
|
if sys.version_info < (3,):
|
||
|
__div__ = __truediv__
|