12
12
from typing import Iterator , Optional , Set , Union # noqa: F401
13
13
from typing import cast
14
14
from typing .io import BinaryIO , TextIO
15
- from zipfile import ZipFile
15
+ from zipimport import ZipImportError
16
16
17
17
18
18
Package = Union [str , ModuleType ]
@@ -216,38 +216,7 @@ def is_resource(package: Package, name: str) -> bool:
216
216
# contents doesn't necessarily mean it's a resource. Directories are not
217
217
# resources, so let's try to find out if it's a directory or not.
218
218
path = Path (package .__spec__ .origin ).parent / name
219
- if path .is_file ():
220
- return True
221
- if path .is_dir ():
222
- return False
223
- # If it's not a file and it's not a directory, what is it? Well, this
224
- # means the file doesn't exist on the file system, so it probably lives
225
- # inside a zip file. We have to crack open the zip, look at its table of
226
- # contents, and make sure that this entry doesn't have sub-entries.
227
- archive_path = package .__spec__ .loader .archive # type: ignore
228
- package_directory = Path (package .__spec__ .origin ).parent
229
- with ZipFile (archive_path ) as zf :
230
- toc = zf .namelist ()
231
- relpath = package_directory .relative_to (archive_path )
232
- candidate_path = relpath / name
233
- for entry in toc :
234
- try :
235
- relative_to_candidate = Path (entry ).relative_to (candidate_path )
236
- except ValueError :
237
- # The two paths aren't relative to each other so we can ignore it.
238
- continue
239
- # Since directories aren't explicitly listed in the zip file, we must
240
- # infer their 'directory-ness' by looking at the number of path
241
- # components in the path relative to the package resource we're
242
- # looking up. If there are zero additional parts, it's a file, i.e. a
243
- # resource. If there are more than zero it's a directory, i.e. not a
244
- # resource. It has to be one of these two cases.
245
- return len (relative_to_candidate .parts ) == 0
246
- # I think it's impossible to get here. It would mean that we are looking
247
- # for a resource in a zip file, there's an entry matching it in the return
248
- # value of contents(), but we never actually found it in the zip's table of
249
- # contents.
250
- raise AssertionError ('Impossible situation' )
219
+ return path .is_file ()
251
220
252
221
253
222
def contents (package : Package ) -> Iterator [str ]:
@@ -268,38 +237,85 @@ def contents(package: Package) -> Iterator[str]:
268
237
not package .__spec__ .has_location ):
269
238
return []
270
239
package_directory = Path (package .__spec__ .origin ).parent
271
- try :
272
- yield from os .listdir (str (package_directory ))
273
- except (NotADirectoryError , FileNotFoundError ):
274
- # The package is probably in a zip file.
275
- archive_path = getattr (package .__spec__ .loader , 'archive' , None )
276
- if archive_path is None :
277
- raise
278
- relpath = package_directory .relative_to (archive_path )
279
- with ZipFile (archive_path ) as zf :
280
- toc = zf .namelist ()
281
- subdirs_seen = set () # type: Set
282
- for filename in toc :
283
- path = Path (filename )
284
- # Strip off any path component parts that are in common with the
285
- # package directory, relative to the zip archive's file system
286
- # path. This gives us all the parts that live under the named
287
- # package inside the zip file. If the length of these subparts is
288
- # exactly 1, then it is situated inside the package. The resulting
289
- # length will be 0 if it's above the package, and it will be
290
- # greater than 1 if it lives in a subdirectory of the package
291
- # directory.
292
- #
293
- # However, since directories themselves don't appear in the zip
294
- # archive as a separate entry, we need to return the first path
295
- # component for any case that has > 1 subparts -- but only once!
296
- if path .parts [:len (relpath .parts )] != relpath .parts :
240
+ yield from os .listdir (str (package_directory ))
241
+
242
+
243
+ # Private implementation of ResourceReader and get_resource_reader() for
244
+ # zipimport. Don't use these directly! We're implementing these in Python
245
+ # because 1) it's easier, 2) zipimport will likely get rewritten in Python
246
+ # itself at some point, so doing this all in C would just be a waste of
247
+ # effort.
248
+
249
+ class _ZipImportResourceReader (resources_abc .ResourceReader ):
250
+ """Private class used to support ZipImport.get_resource_reader().
251
+
252
+ This class is allowed to reference all the innards and private parts of
253
+ the zipimporter.
254
+ """
255
+
256
+ def __init__ (self , zipimporter , fullname ):
257
+ self .zipimporter = zipimporter
258
+ self .fullname = fullname
259
+
260
+ def open_resource (self , resource ):
261
+ path = f'{ self .fullname } /{ resource } '
262
+ try :
263
+ return BytesIO (self .zipimporter .get_data (path ))
264
+ except OSError :
265
+ raise FileNotFoundError
266
+
267
+ def resource_path (self , resource ):
268
+ # All resources are in the zip file, so there is no path to the file.
269
+ # Raising FileNotFoundError tells the higher level API to extract the
270
+ # binary data and create a temporary file.
271
+ raise FileNotFoundError
272
+
273
+ def is_resource (self , name ):
274
+ # Maybe we could do better, but if we can get the data, it's a
275
+ # resource. Otherwise it isn't.
276
+ path = f'{ self .fullname } /{ name } '
277
+ try :
278
+ self .zipimporter .get_data (path )
279
+ except OSError :
280
+ return False
281
+ return True
282
+
283
+ def contents (self ):
284
+ # This is a bit convoluted, because fullname will be a module path,
285
+ # but _files is a list of file names relative to the top of the
286
+ # archive's namespace. We want to compare file paths to find all the
287
+ # names of things inside the module represented by fullname. So we
288
+ # turn the module path of fullname into a file path relative to the
289
+ # top of the archive, and then we iterate through _files looking for
290
+ # names inside that "directory".
291
+ fullname_path = Path (self .zipimporter .get_filename (self .fullname ))
292
+ relative_path = fullname_path .relative_to (self .zipimporter .archive )
293
+ # Don't forget that fullname names a package, so its path will include
294
+ # __init__.py, which we want to ignore.
295
+ assert relative_path .name == '__init__.py'
296
+ package_path = relative_path .parent
297
+ subdirs_seen = set ()
298
+ for filename in self .zipimporter ._files :
299
+ try :
300
+ relative = Path (filename ).relative_to (package_path )
301
+ except ValueError :
297
302
continue
298
- subparts = path .parts [len (relpath .parts ):]
299
- if len (subparts ) == 1 :
300
- yield subparts [0 ]
301
- elif len (subparts ) > 1 :
302
- subdir = subparts [0 ]
303
- if subdir not in subdirs_seen :
304
- subdirs_seen .add (subdir )
305
- yield subdir
303
+ # If the path of the file (which is relative to the top of the zip
304
+ # namespace), relative to the package given when the resource
305
+ # reader was created, has a parent, then it's a name in a
306
+ # subdirectory and thus we skip it.
307
+ parent_name = relative .parent .name
308
+ if len (parent_name ) == 0 :
309
+ yield relative .name
310
+ elif parent_name not in subdirs_seen :
311
+ subdirs_seen .add (parent_name )
312
+ yield parent_name
313
+
314
+
315
+ def _zipimport_get_resource_reader (zipimporter , fullname ):
316
+ try :
317
+ if not zipimporter .is_package (fullname ):
318
+ return None
319
+ except ZipImportError :
320
+ return None
321
+ return _ZipImportResourceReader (zipimporter , fullname )
0 commit comments