Skip to content

Commit 6f6eb35

Browse files
authored
bpo-32248 - Implement ResourceReader and get_resource_reader() for zipimport (#5248)
1 parent 789e359 commit 6f6eb35

File tree

5 files changed

+157
-83
lines changed

5 files changed

+157
-83
lines changed

Doc/whatsnew/3.7.rst

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -328,8 +328,12 @@ importlib.resources
328328
This module provides several new APIs and one new ABC for access to, opening,
329329
and reading *resources* inside packages. Resources are roughly akin to files
330330
inside of packages, but they needn't be actual files on the physical file
331-
system. Module loaders can provide :class:`importlib.abc.ResourceReader`
332-
implementations to support this new module's API.
331+
system. Module loaders can provide a :meth:`get_resource_reader()` function
332+
which returns a :class:`importlib.abc.ResourceReader` instance to support this
333+
new API. Built-in file path loaders and zip file loaders both support this.
334+
(see the PyPI package
335+
`importlib_resources <http://importlib-resources.readthedocs.io/en/latest/>`_
336+
as a compatible back port for older Python versions).
333337

334338

335339
Improved Modules

Lib/importlib/resources.py

Lines changed: 83 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from typing import Iterator, Optional, Set, Union # noqa: F401
1313
from typing import cast
1414
from typing.io import BinaryIO, TextIO
15-
from zipfile import ZipFile
15+
from zipimport import ZipImportError
1616

1717

1818
Package = Union[str, ModuleType]
@@ -216,38 +216,7 @@ def is_resource(package: Package, name: str) -> bool:
216216
# contents doesn't necessarily mean it's a resource. Directories are not
217217
# resources, so let's try to find out if it's a directory or not.
218218
path = Path(package.__spec__.origin).parent / name
219-
if path.is_file():
220-
return True
221-
if path.is_dir():
222-
return False
223-
# If it's not a file and it's not a directory, what is it? Well, this
224-
# means the file doesn't exist on the file system, so it probably lives
225-
# inside a zip file. We have to crack open the zip, look at its table of
226-
# contents, and make sure that this entry doesn't have sub-entries.
227-
archive_path = package.__spec__.loader.archive # type: ignore
228-
package_directory = Path(package.__spec__.origin).parent
229-
with ZipFile(archive_path) as zf:
230-
toc = zf.namelist()
231-
relpath = package_directory.relative_to(archive_path)
232-
candidate_path = relpath / name
233-
for entry in toc:
234-
try:
235-
relative_to_candidate = Path(entry).relative_to(candidate_path)
236-
except ValueError:
237-
# The two paths aren't relative to each other so we can ignore it.
238-
continue
239-
# Since directories aren't explicitly listed in the zip file, we must
240-
# infer their 'directory-ness' by looking at the number of path
241-
# components in the path relative to the package resource we're
242-
# looking up. If there are zero additional parts, it's a file, i.e. a
243-
# resource. If there are more than zero it's a directory, i.e. not a
244-
# resource. It has to be one of these two cases.
245-
return len(relative_to_candidate.parts) == 0
246-
# I think it's impossible to get here. It would mean that we are looking
247-
# for a resource in a zip file, there's an entry matching it in the return
248-
# value of contents(), but we never actually found it in the zip's table of
249-
# contents.
250-
raise AssertionError('Impossible situation')
219+
return path.is_file()
251220

252221

253222
def contents(package: Package) -> Iterator[str]:
@@ -268,38 +237,85 @@ def contents(package: Package) -> Iterator[str]:
268237
not package.__spec__.has_location):
269238
return []
270239
package_directory = Path(package.__spec__.origin).parent
271-
try:
272-
yield from os.listdir(str(package_directory))
273-
except (NotADirectoryError, FileNotFoundError):
274-
# The package is probably in a zip file.
275-
archive_path = getattr(package.__spec__.loader, 'archive', None)
276-
if archive_path is None:
277-
raise
278-
relpath = package_directory.relative_to(archive_path)
279-
with ZipFile(archive_path) as zf:
280-
toc = zf.namelist()
281-
subdirs_seen = set() # type: Set
282-
for filename in toc:
283-
path = Path(filename)
284-
# Strip off any path component parts that are in common with the
285-
# package directory, relative to the zip archive's file system
286-
# path. This gives us all the parts that live under the named
287-
# package inside the zip file. If the length of these subparts is
288-
# exactly 1, then it is situated inside the package. The resulting
289-
# length will be 0 if it's above the package, and it will be
290-
# greater than 1 if it lives in a subdirectory of the package
291-
# directory.
292-
#
293-
# However, since directories themselves don't appear in the zip
294-
# archive as a separate entry, we need to return the first path
295-
# component for any case that has > 1 subparts -- but only once!
296-
if path.parts[:len(relpath.parts)] != relpath.parts:
240+
yield from os.listdir(str(package_directory))
241+
242+
243+
# Private implementation of ResourceReader and get_resource_reader() for
244+
# zipimport. Don't use these directly! We're implementing these in Python
245+
# because 1) it's easier, 2) zipimport will likely get rewritten in Python
246+
# itself at some point, so doing this all in C would just be a waste of
247+
# effort.
248+
249+
class _ZipImportResourceReader(resources_abc.ResourceReader):
250+
"""Private class used to support ZipImport.get_resource_reader().
251+
252+
This class is allowed to reference all the innards and private parts of
253+
the zipimporter.
254+
"""
255+
256+
def __init__(self, zipimporter, fullname):
257+
self.zipimporter = zipimporter
258+
self.fullname = fullname
259+
260+
def open_resource(self, resource):
261+
path = f'{self.fullname}/{resource}'
262+
try:
263+
return BytesIO(self.zipimporter.get_data(path))
264+
except OSError:
265+
raise FileNotFoundError
266+
267+
def resource_path(self, resource):
268+
# All resources are in the zip file, so there is no path to the file.
269+
# Raising FileNotFoundError tells the higher level API to extract the
270+
# binary data and create a temporary file.
271+
raise FileNotFoundError
272+
273+
def is_resource(self, name):
274+
# Maybe we could do better, but if we can get the data, it's a
275+
# resource. Otherwise it isn't.
276+
path = f'{self.fullname}/{name}'
277+
try:
278+
self.zipimporter.get_data(path)
279+
except OSError:
280+
return False
281+
return True
282+
283+
def contents(self):
284+
# This is a bit convoluted, because fullname will be a module path,
285+
# but _files is a list of file names relative to the top of the
286+
# archive's namespace. We want to compare file paths to find all the
287+
# names of things inside the module represented by fullname. So we
288+
# turn the module path of fullname into a file path relative to the
289+
# top of the archive, and then we iterate through _files looking for
290+
# names inside that "directory".
291+
fullname_path = Path(self.zipimporter.get_filename(self.fullname))
292+
relative_path = fullname_path.relative_to(self.zipimporter.archive)
293+
# Don't forget that fullname names a package, so its path will include
294+
# __init__.py, which we want to ignore.
295+
assert relative_path.name == '__init__.py'
296+
package_path = relative_path.parent
297+
subdirs_seen = set()
298+
for filename in self.zipimporter._files:
299+
try:
300+
relative = Path(filename).relative_to(package_path)
301+
except ValueError:
297302
continue
298-
subparts = path.parts[len(relpath.parts):]
299-
if len(subparts) == 1:
300-
yield subparts[0]
301-
elif len(subparts) > 1:
302-
subdir = subparts[0]
303-
if subdir not in subdirs_seen:
304-
subdirs_seen.add(subdir)
305-
yield subdir
303+
# If the path of the file (which is relative to the top of the zip
304+
# namespace), relative to the package given when the resource
305+
# reader was created, has a parent, then it's a name in a
306+
# subdirectory and thus we skip it.
307+
parent_name = relative.parent.name
308+
if len(parent_name) == 0:
309+
yield relative.name
310+
elif parent_name not in subdirs_seen:
311+
subdirs_seen.add(parent_name)
312+
yield parent_name
313+
314+
315+
def _zipimport_get_resource_reader(zipimporter, fullname):
316+
try:
317+
if not zipimporter.is_package(fullname):
318+
return None
319+
except ZipImportError:
320+
return None
321+
return _ZipImportResourceReader(zipimporter, fullname)
Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,6 @@
1-
Add :class:`importlib.abc.ResourceReader` as an ABC to provide a
2-
unified API for reading resources contained within packages. Loaders
3-
wishing to support resource reading are expected to implement the
4-
``get_resource_reader(fullname)`` method.
5-
6-
Also add :mod:`importlib.resources` as the stdlib port of the
7-
``importlib_resources`` PyPI package. The modules provides a high-level
8-
API for end-users to read resources in a nicer fashion than having to
9-
directly interact with low-level details such as loaders.
10-
11-
Thanks to this work, :class:`importlib.abc.ResourceLoader` has now
12-
been documented as deprecated due to its under-specified nature and
13-
lack of features as provided by :class:`importlib.abc.ResourceReader`.
1+
Add :mod:`importlib.resources` and :class:`importlib.abc.ResourceReader` as
2+
the unified API for reading resources contained within packages. Loaders
3+
wishing to support resource reading must implement the
4+
:meth:`get_resource_reader()` method. File-based and zipimport-based loaders
5+
both implement these APIs. :class:`importlib.abc.ResourceLoader` is
6+
deprecated in favor of these new APIs.

Modules/clinic/zipimport.c.h

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,4 +291,35 @@ zipimport_zipimporter_get_source(ZipImporter *self, PyObject *arg)
291291
exit:
292292
return return_value;
293293
}
294-
/*[clinic end generated code: output=93cb62a3a9752b9f input=a9049054013a1b77]*/
294+
295+
PyDoc_STRVAR(zipimport_zipimporter_get_resource_reader__doc__,
296+
"get_resource_reader($self, fullname, /)\n"
297+
"--\n"
298+
"\n"
299+
"Return the ResourceReader for a package in a zip file.\n"
300+
"\n"
301+
"If \'fullname\' is a package within the zip file, return the \'ResourceReader\'\n"
302+
"object for the package. Otherwise return None.");
303+
304+
#define ZIPIMPORT_ZIPIMPORTER_GET_RESOURCE_READER_METHODDEF \
305+
{"get_resource_reader", (PyCFunction)zipimport_zipimporter_get_resource_reader, METH_O, zipimport_zipimporter_get_resource_reader__doc__},
306+
307+
static PyObject *
308+
zipimport_zipimporter_get_resource_reader_impl(ZipImporter *self,
309+
PyObject *fullname);
310+
311+
static PyObject *
312+
zipimport_zipimporter_get_resource_reader(ZipImporter *self, PyObject *arg)
313+
{
314+
PyObject *return_value = NULL;
315+
PyObject *fullname;
316+
317+
if (!PyArg_Parse(arg, "U:get_resource_reader", &fullname)) {
318+
goto exit;
319+
}
320+
return_value = zipimport_zipimporter_get_resource_reader_impl(self, fullname);
321+
322+
exit:
323+
return return_value;
324+
}
325+
/*[clinic end generated code: output=0b57adfe21373512 input=a9049054013a1b77]*/

Modules/zipimport.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -784,6 +784,35 @@ zipimport_zipimporter_get_source_impl(ZipImporter *self, PyObject *fullname)
784784
Py_RETURN_NONE;
785785
}
786786

787+
/*[clinic input]
788+
zipimport.zipimporter.get_resource_reader
789+
790+
fullname: unicode
791+
/
792+
793+
Return the ResourceReader for a package in a zip file.
794+
795+
If 'fullname' is a package within the zip file, return the 'ResourceReader'
796+
object for the package. Otherwise return None.
797+
798+
[clinic start generated code]*/
799+
800+
static PyObject *
801+
zipimport_zipimporter_get_resource_reader_impl(ZipImporter *self,
802+
PyObject *fullname)
803+
/*[clinic end generated code: output=5e367d431f830726 input=bfab94d736e99151]*/
804+
{
805+
PyObject *module = PyImport_ImportModule("importlib.resources");
806+
if (module == NULL) {
807+
return NULL;
808+
}
809+
PyObject *retval = PyObject_CallMethod(
810+
module, "_zipimport_get_resource_reader",
811+
"OO", (PyObject *)self, fullname);
812+
Py_DECREF(module);
813+
return retval;
814+
}
815+
787816

788817
static PyMethodDef zipimporter_methods[] = {
789818
ZIPIMPORT_ZIPIMPORTER_FIND_MODULE_METHODDEF
@@ -794,6 +823,7 @@ static PyMethodDef zipimporter_methods[] = {
794823
ZIPIMPORT_ZIPIMPORTER_GET_DATA_METHODDEF
795824
ZIPIMPORT_ZIPIMPORTER_GET_CODE_METHODDEF
796825
ZIPIMPORT_ZIPIMPORTER_GET_SOURCE_METHODDEF
826+
ZIPIMPORT_ZIPIMPORTER_GET_RESOURCE_READER_METHODDEF
797827
{NULL, NULL} /* sentinel */
798828
};
799829

0 commit comments

Comments
 (0)