From 9e60eebc267b1019729a89e7024c39749dfa7b0a Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Sat, 2 Sep 2017 17:26:25 -0700 Subject: [PATCH 01/33] NF: Add ability for ArrayProxy to keep its ImageOpener open, instead of creating a new on on every file access. --- nibabel/analyze.py | 20 ++++++++++++++++---- nibabel/arrayproxy.py | 41 +++++++++++++++++++++++++++++++++++------ 2 files changed, 51 insertions(+), 10 deletions(-) diff --git a/nibabel/analyze.py b/nibabel/analyze.py index 2f8cf03988..68edf926fc 100644 --- a/nibabel/analyze.py +++ b/nibabel/analyze.py @@ -934,7 +934,7 @@ def set_data_dtype(self, dtype): @classmethod @kw_only_meth(1) - def from_file_map(klass, file_map, mmap=True): + def from_file_map(klass, file_map, mmap=True, keep_file_open=False): ''' class method to create image from mapping in `file_map `` Parameters @@ -950,6 +950,11 @@ def from_file_map(klass, file_map, mmap=True): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. + keep_file_open: If ``file_like`` is a file name, the default behaviour + is to open a new file handle every time the data is accessed. If + this flag is set to `True``, the file handle will be opened on the + first access, and kept open until this ``ArrayProxy`` is garbage- + collected. Returns ------- @@ -964,7 +969,8 @@ def from_file_map(klass, file_map, mmap=True): imgf = img_fh.fileobj if imgf is None: imgf = img_fh.filename - data = klass.ImageArrayProxy(imgf, hdr_copy, mmap=mmap) + data = klass.ImageArrayProxy(imgf, hdr_copy, mmap=mmap, + keep_file_open=keep_file_open) # Initialize without affine to allow header to pass through unmodified img = klass(data, None, header, file_map=file_map) # set affine from header though @@ -976,7 +982,7 @@ def from_file_map(klass, file_map, mmap=True): @classmethod @kw_only_meth(1) - def from_filename(klass, filename, mmap=True): + def from_filename(klass, filename, mmap=True, keep_file_open=False): ''' class method to create image from filename `filename` Parameters @@ -990,6 +996,11 @@ def from_filename(klass, filename, mmap=True): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. + keep_file_open: If ``file_like`` is a file name, the default behaviour + is to open a new file handle every time the data is accessed. If + this flag is set to `True``, the file handle will be opened on the + first access, and kept open until this ``ArrayProxy`` is garbage- + collected. Returns ------- @@ -998,7 +1009,8 @@ def from_filename(klass, filename, mmap=True): if mmap not in (True, False, 'c', 'r'): raise ValueError("mmap should be one of {True, False, 'c', 'r'}") file_map = klass.filespec_to_file_map(filename) - return klass.from_file_map(file_map, mmap=mmap) + return klass.from_file_map(file_map, mmap=mmap, + keep_file_open=keep_file_open) load = from_filename diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py index 10a6cab8e6..fe69b5b7b2 100644 --- a/nibabel/arrayproxy.py +++ b/nibabel/arrayproxy.py @@ -25,6 +25,8 @@ See :mod:`nibabel.tests.test_proxy_api` for proxy API conformance checks. """ +from contextlib import contextmanager + import numpy as np from .deprecated import deprecate_with_version @@ -69,8 +71,8 @@ class ArrayProxy(object): _header = None @kw_only_meth(2) - def __init__(self, file_like, spec, mmap=True): - """ Initialize array proxy instance + def __init__(self, file_like, spec, mmap=True, keep_file_open=False): + """Initialize array proxy instance Parameters ---------- @@ -99,12 +101,16 @@ def __init__(self, file_like, spec, mmap=True): True gives the same behavior as ``mmap='c'``. If `file_like` cannot be memory-mapped, ignore `mmap` value and read array from file. - scaling : {'fp', 'dv'}, optional, keyword only - Type of scaling to use - see header ``get_data_scaling`` method. + keep_file_open: If ``file_like`` is a file name, the default behaviour + is to open a new file handle every time the data is accessed. If + this flag is set to `True``, the file handle will be opened on the + first access, and kept open until this ``ArrayProxy`` is garbage- + collected. """ if mmap not in (True, False, 'c', 'r'): raise ValueError("mmap should be one of {True, False, 'c', 'r'}") self.file_like = file_like + self._keep_file_open = keep_file_open if hasattr(spec, 'get_data_shape'): slope, inter = spec.get_slope_inter() par = (spec.get_data_shape(), @@ -126,6 +132,15 @@ def __init__(self, file_like, spec, mmap=True): self._dtype = np.dtype(self._dtype) self._mmap = mmap + def __del__(self): + '''If this ``ArrayProxy`` was created with ``keep_file_open=True``, + the open file object is closed if necessary. + ''' + if self._keep_file_open and hasattr(self, '_opener'): + if not self._opener.closed: + self._opener.close() + self._opener = None + @property @deprecate_with_version('ArrayProxy.header deprecated', '2.2', '3.0') def header(self): @@ -155,12 +170,26 @@ def inter(self): def is_proxy(self): return True + @contextmanager + def _get_fileobj(self): + '''Create and return a new ``ImageOpener``, or return an existing one. + one. The specific behaviour depends on the value of the + ``keep_file_open`` flag that was passed to ``__init__``. + ''' + if self._keep_file_open: + if not hasattr(self, '_opener'): + self._opener = ImageOpener(self.file_like) + yield self._opener + else: + with ImageOpener(self.file_like) as opener: + yield opener + def get_unscaled(self): ''' Read of data from file This is an optional part of the proxy API ''' - with ImageOpener(self.file_like) as fileobj: + with self._get_fileobj() as fileobj: raw_data = array_from_file(self._shape, self._dtype, fileobj, @@ -175,7 +204,7 @@ def __array__(self): return apply_read_scaling(raw_data, self._slope, self._inter) def __getitem__(self, slicer): - with ImageOpener(self.file_like) as fileobj: + with self._get_fileobj() as fileobj: raw_data = fileslice(fileobj, slicer, self._shape, From 3d170be693e3b89e01fc231f79659ec2dda3dcac Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Sat, 2 Sep 2017 18:00:02 -0700 Subject: [PATCH 02/33] NF: Use indexed_gzip, if it is present, when opening .gz files in rb mode. --- nibabel/openers.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/nibabel/openers.py b/nibabel/openers.py index abed97afcf..a7a7eeeca2 100644 --- a/nibabel/openers.py +++ b/nibabel/openers.py @@ -60,8 +60,30 @@ def readinto(self, buf): return n_read -def _gzip_open(fileish, *args, **kwargs): - gzip_file = BufferedGzipFile(fileish, *args, **kwargs) +def _gzip_open(fileish, mode='rb', compresslevel=9): + + # is indexed_gzip present? + try: + from indexed_gzip import SafeIndexedGzipFile + have_indexed_gzip = True + except: + have_indexed_gzip = False + + # is this a file? if not we assume it is a string + is_file = hasattr(fileish, 'read') and hasattr(fileish, 'write') and \ + hasattr(fileish, 'mode') + if is_file: + mode = file.mode + + # use indexed_gzip if possible for faster read access + if mode == 'rb' and have_indexed_gzip: + kwargs = {'spacing' : 4194304, 'readbuf_size' : 1048576} + if hasattr(fileish, 'read') and hasattr(fileish, 'write'): + gzip_file = SafeIndexedGzipFile(fid=fileish, **kwargs) + else: + gzip_file = SafeIndexedGzipFile(filename=fileish, **kwargs) + else: + gzip_file = BufferedGzipFile(fileish, mode, compresslevel) # Speedup for #209; attribute not present in in Python 3.5 # open gzip files with faster reads on large files using larger From 32e3b05b119db46b29bd340a15c6bc954a63778d Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Sat, 2 Sep 2017 18:43:17 -0700 Subject: [PATCH 03/33] BF: fileslice.fileslice and read_segments functions optionally accept a threading.Lock to protect seek/read pairs. ArrayProxy creates said lock, and passes it into fileslice calls. Also fixed typo in openers.py from previous commit --- nibabel/arrayproxy.py | 7 +++++-- nibabel/fileslice.py | 31 ++++++++++++++++++++++++------- nibabel/openers.py | 2 +- 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py index fe69b5b7b2..59c60a60d0 100644 --- a/nibabel/arrayproxy.py +++ b/nibabel/arrayproxy.py @@ -26,6 +26,7 @@ See :mod:`nibabel.tests.test_proxy_api` for proxy API conformance checks. """ from contextlib import contextmanager +from threading import Lock import numpy as np @@ -110,7 +111,6 @@ def __init__(self, file_like, spec, mmap=True, keep_file_open=False): if mmap not in (True, False, 'c', 'r'): raise ValueError("mmap should be one of {True, False, 'c', 'r'}") self.file_like = file_like - self._keep_file_open = keep_file_open if hasattr(spec, 'get_data_shape'): slope, inter = spec.get_slope_inter() par = (spec.get_data_shape(), @@ -131,6 +131,8 @@ def __init__(self, file_like, spec, mmap=True, keep_file_open=False): # Permit any specifier that can be interpreted as a numpy dtype self._dtype = np.dtype(self._dtype) self._mmap = mmap + self._keep_file_open = keep_file_open + self._lock = Lock() def __del__(self): '''If this ``ArrayProxy`` was created with ``keep_file_open=True``, @@ -210,7 +212,8 @@ def __getitem__(self, slicer): self._shape, self._dtype, self._offset, - order=self.order) + order=self.order, + lock=self._lock) # Upcast as necessary for big slopes, intercepts return apply_read_scaling(raw_data, self._slope, self._inter) diff --git a/nibabel/fileslice.py b/nibabel/fileslice.py index ee72b83c99..ca392d2549 100644 --- a/nibabel/fileslice.py +++ b/nibabel/fileslice.py @@ -1,6 +1,7 @@ """ Utilities for getting array slices out of file-like objects """ from __future__ import division +from contextlib import contextmanager import operator from numbers import Integral @@ -622,7 +623,7 @@ def slicers2segments(read_slicers, in_shape, offset, itemsize): return all_segments -def read_segments(fileobj, segments, n_bytes): +def read_segments(fileobj, segments, n_bytes, lock=None): """ Read `n_bytes` byte data implied by `segments` from `fileobj` Parameters @@ -634,6 +635,10 @@ def read_segments(fileobj, segments, n_bytes): absolute file offset in bytes and number of bytes to read n_bytes : int total number of bytes that will be read + lock : threading.Lock + If provided, used to ensure that paired calls to ``seek`` and ``read`` + cannot be interrupted by another thread accessing the same ``fileobj``. + Returns ------- @@ -641,22 +646,31 @@ def read_segments(fileobj, segments, n_bytes): object implementing buffer protocol, such as byte string or ndarray or mmap or ctypes ``c_char_array`` """ + # Make a dummy lock-like thing to make the code below a bit nicer + if lock is None: + @contextmanager + def dummy_lock(): + yield + lock = dummy_lock + if len(segments) == 0: if n_bytes != 0: raise ValueError("No segments, but non-zero n_bytes") return b'' if len(segments) == 1: offset, length = segments[0] - fileobj.seek(offset) - bytes = fileobj.read(length) + with lock: + fileobj.seek(offset) + bytes = fileobj.read(length) if len(bytes) != n_bytes: raise ValueError("Whoops, not enough data in file") return bytes # More than one segment bytes = mmap(-1, n_bytes) for offset, length in segments: - fileobj.seek(offset) - bytes.write(fileobj.read(length)) + with lock: + fileobj.seek(offset) + bytes.write(fileobj.read(length)) if bytes.tell() != n_bytes: raise ValueError("Oh dear, n_bytes does not look right") return bytes @@ -700,7 +714,7 @@ def _simple_fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C', def fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C', - heuristic=threshold_heuristic): + heuristic=threshold_heuristic, lock=None): """ Slice array in `fileobj` using `sliceobj` slicer and array definitions `fileobj` contains the contiguous binary data for an array ``A`` of shape, @@ -737,6 +751,9 @@ def fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C', returning one of 'full', 'contiguous', None. See :func:`optimize_slicer` and see :func:`threshold_heuristic` for an example. + lock: threading.Lock, optional + If provided, used to ensure that paired calls to ``seek`` and ``read`` + cannot be interrupted by another thread accessing the same ``fileobj``. Returns ------- @@ -750,7 +767,7 @@ def fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C', segments, sliced_shape, post_slicers = calc_slicedefs( sliceobj, shape, itemsize, offset, order) n_bytes = reduce(operator.mul, sliced_shape, 1) * itemsize - bytes = read_segments(fileobj, segments, n_bytes) + bytes = read_segments(fileobj, segments, n_bytes, lock) sliced = np.ndarray(sliced_shape, dtype, buffer=bytes, order=order) return sliced[post_slicers] diff --git a/nibabel/openers.py b/nibabel/openers.py index a7a7eeeca2..7295f9805c 100644 --- a/nibabel/openers.py +++ b/nibabel/openers.py @@ -73,7 +73,7 @@ def _gzip_open(fileish, mode='rb', compresslevel=9): is_file = hasattr(fileish, 'read') and hasattr(fileish, 'write') and \ hasattr(fileish, 'mode') if is_file: - mode = file.mode + mode = fileish.mode # use indexed_gzip if possible for faster read access if mode == 'rb' and have_indexed_gzip: From 572ab00589463c3a6bfbbc0ff918037ac42e5602 Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Sun, 3 Sep 2017 05:37:50 +0100 Subject: [PATCH 04/33] BF: Making all existing unit tests happy --- nibabel/arrayproxy.py | 18 ++++++++++++++---- nibabel/fileslice.py | 11 ++++++----- nibabel/spm99analyze.py | 14 ++++++++++---- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py index 59c60a60d0..d1c3ae454b 100644 --- a/nibabel/arrayproxy.py +++ b/nibabel/arrayproxy.py @@ -138,10 +138,20 @@ def __del__(self): '''If this ``ArrayProxy`` was created with ``keep_file_open=True``, the open file object is closed if necessary. ''' - if self._keep_file_open and hasattr(self, '_opener'): - if not self._opener.closed: - self._opener.close() - self._opener = None + if hasattr(self, '_opener') and not self._opener.closed: + self._opener.close() + self._opener = None + + def __getstate__(self): + '''Returns the state of this ``ArrayProxy`` during pickling. ''' + state = self.__dict__.copy() + state.pop('_lock', None) + return state + + def __setstate__(self, state): + '''Sets the state of this ``ArrayProxy`` during unpickling. ''' + self.__dict__.update(state) + self._lock = Lock() @property @deprecate_with_version('ArrayProxy.header deprecated', '2.2', '3.0') diff --git a/nibabel/fileslice.py b/nibabel/fileslice.py index ca392d2549..4834872509 100644 --- a/nibabel/fileslice.py +++ b/nibabel/fileslice.py @@ -1,7 +1,6 @@ """ Utilities for getting array slices out of file-like objects """ from __future__ import division -from contextlib import contextmanager import operator from numbers import Integral @@ -648,10 +647,12 @@ def read_segments(fileobj, segments, n_bytes, lock=None): """ # Make a dummy lock-like thing to make the code below a bit nicer if lock is None: - @contextmanager - def dummy_lock(): - yield - lock = dummy_lock + class DummyLock(object): + def __enter__(self): + pass + def __exit__(self, exc_type, exc_val, exc_tb): + return False + lock = DummyLock() if len(segments) == 0: if n_bytes != 0: diff --git a/nibabel/spm99analyze.py b/nibabel/spm99analyze.py index 70b80c5818..122fb1480e 100644 --- a/nibabel/spm99analyze.py +++ b/nibabel/spm99analyze.py @@ -245,8 +245,8 @@ class Spm99AnalyzeImage(analyze.AnalyzeImage): @classmethod @kw_only_meth(1) - def from_file_map(klass, file_map, mmap=True): - ''' class method to create image from mapping in `file_map `` + def from_file_map(klass, file_map, mmap=True, keep_file_open=False): + '''class method to create image from mapping in `file_map `` Parameters ---------- @@ -261,13 +261,19 @@ def from_file_map(klass, file_map, mmap=True): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. + keep_file_open: If the file-ilke(s) is a file name, the default + behaviour is to open a new file handle every time the data is + accessed. If this flag is set to `True``, the file handle will be + opened on the first access, and kept open until this + ``ArrayProxy`` is garbage-collected. Returns ------- img : Spm99AnalyzeImage instance + ''' - ret = super(Spm99AnalyzeImage, klass).from_file_map(file_map, - mmap=mmap) + ret = super(Spm99AnalyzeImage, klass).from_file_map( + file_map, mmap=mmap, keep_file_open=keep_file_open) try: matf = file_map['mat'].get_prepare_fileobj() except IOError: From 7cbbab2acf66bd4d8748f0c75820b7bc6a677de6 Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Sun, 3 Sep 2017 13:24:04 +0100 Subject: [PATCH 05/33] TEST: Unit tests for ArrayProxy keep_file_open flag, and handling of internal threading.Lock object. --- nibabel/tests/test_arrayproxy.py | 51 ++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/nibabel/tests/test_arrayproxy.py b/nibabel/tests/test_arrayproxy.py index dfc16fb48e..7930b16964 100644 --- a/nibabel/tests/test_arrayproxy.py +++ b/nibabel/tests/test_arrayproxy.py @@ -12,18 +12,21 @@ import warnings +import pickle from io import BytesIO from ..tmpdirs import InTemporaryDirectory import numpy as np from ..arrayproxy import ArrayProxy, is_proxy, reshape_dataobj +from ..openers import ImageOpener from ..nifti1 import Nifti1Header from numpy.testing import assert_array_equal, assert_array_almost_equal from nose.tools import (assert_true, assert_false, assert_equal, assert_not_equal, assert_raises) from nibabel.testing import VIRAL_MEMMAP +import mock from .test_fileslice import slicer_samples @@ -327,3 +330,51 @@ def check_mmap(hdr, offset, proxy_class, # Check invalid values raise error assert_raises(ValueError, proxy_class, fname, hdr, mmap='rw') assert_raises(ValueError, proxy_class, fname, hdr, mmap='r+') + + +def test_keep_file_open(): + # Test the behaviour of the keep_file_open __init__ flag. + numopeners = [0] + + class CountingImageOpener(ImageOpener): + + def __init__(self, *args, **kwargs): + + super(CountingImageOpener, self).__init__(*args, **kwargs) + numopeners[0] += 1 + + fname = 'testdata' + dtype = np.float32 + data = np.arange(1000, dtype=np.float32).reshape((10, 10, 10)) + with InTemporaryDirectory(): + with open(fname, 'wb') as fobj: + fobj.write(data.tostring(order='F')) + with mock.patch('nibabel.arrayproxy.ImageOpener', CountingImageOpener): + proxy_no_kfp = ArrayProxy(fname, ((10, 10, 10), dtype)) + proxy_kfp = ArrayProxy(fname, ((10, 10, 10), dtype), + keep_file_open=True) + voxels = np.random.randint(0, 10, (10, 3), dtype=np.uint32) + for i in range(voxels.shape[0]): + x , y, z = [int(c) for c in voxels[i, :]] + assert proxy_no_kfp[x, y, z] == x * 100 + y * 10 + z + assert numopeners[0] == i + 1 + numopeners[0] = 0 + for i in range(voxels.shape[0]): + x , y, z = [int(c) for c in voxels[i, :]] + assert proxy_kfp[x, y, z] == x * 100 + y * 10 + z + assert numopeners[0] == 1 + + +def test_pickle_lock(): + # Test that ArrayProxy can be pickled, and that thread lock is created + + def islock(l): + # isinstance doesn't work on threading.Lock? + return hasattr(l, 'acquire') and hasattr(l, 'release') + + proxy = ArrayProxy('dummyfile', ((10, 10, 10), np.float32)) + assert islock(proxy._lock) + pickled = pickle.dumps(proxy) + unpickled = pickle.loads(pickled) + assert islock(unpickled._lock) + assert proxy._lock is not unpickled._lock From 052ec60065a399c26e5af854af5f8e7df9aa3fe7 Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Mon, 4 Sep 2017 10:52:50 +0100 Subject: [PATCH 06/33] TEST: Unit test to check thread-safety of fileslice.read_segments --- nibabel/tests/test_fileslice.py | 56 +++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/nibabel/tests/test_fileslice.py b/nibabel/tests/test_fileslice.py index a515d70226..e14c9a414c 100644 --- a/nibabel/tests/test_fileslice.py +++ b/nibabel/tests/test_fileslice.py @@ -8,6 +8,8 @@ from itertools import product from functools import partial from distutils.version import LooseVersion +from threading import Thread, Lock +import time import numpy as np @@ -689,6 +691,60 @@ def test_read_segments(): assert_raises(Exception, read_segments, fobj, [(0, 100), (100, 200)], 199) +def test_read_segments_lock(): + # Test read_segment locking with multiple threads + fobj = BytesIO() + arr = np.random.randint(0, 256, 1000, dtype=np.uint8) + fobj.write(arr.tostring()) + + # Encourage the interpreter to switch threads between a seek/read pair + def yielding_read(*args, **kwargs): + time.sleep(0.001) + return fobj._real_read(*args, **kwargs) + + fobj._real_read = fobj.read + fobj.read = yielding_read + + # Generate some random array segments to read from the file + def random_segments(nsegs): + segs = [] + nbytes = 0 + + for i in range(nsegs): + seglo = np.random.randint(0, 998) + seghi = np.random.randint(seglo + 1, 1000) + seglen = seghi - seglo + nbytes += seglen + segs.append([seglo, seglen]) + + return segs, nbytes + + # Get the data that should be returned for the given segments + def get_expected(segs): + segs = [arr[off:off + length] for off, length in segs] + return np.concatenate(segs) + + # Read from the file, check the result. We do this task simultaneously in + # many threads. Each thread that passes adds 1 to numpassed[0] + numpassed = [0] + lock = Lock() + + def runtest(): + seg, nbytes = random_segments(1) + expected = get_expected(seg) + _check_bytes(read_segments(fobj, seg, nbytes, lock), expected) + + seg, nbytes = random_segments(10) + expected = get_expected(seg) + _check_bytes(read_segments(fobj, seg, nbytes, lock), expected) + numpassed[0] += 1 + + threads = [Thread(target=runtest) for i in range(100)] + [t.start() for t in threads] + [t.join() for t in threads] + assert numpassed[0] == len(threads) + + def _check_slicer(sliceobj, arr, fobj, offset, order, heuristic=threshold_heuristic): new_slice = fileslice(fobj, sliceobj, arr.shape, arr.dtype, offset, order, From 172bb612c73ecbc86fb306cf9315c40d7c16ee84 Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Mon, 4 Sep 2017 10:58:09 +0100 Subject: [PATCH 07/33] DOC: Clarification in read_segments doc, and inline comments in openers._gzip_open function --- nibabel/fileslice.py | 7 ++++++- nibabel/openers.py | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/nibabel/fileslice.py b/nibabel/fileslice.py index 4834872509..0a00a989aa 100644 --- a/nibabel/fileslice.py +++ b/nibabel/fileslice.py @@ -637,7 +637,9 @@ def read_segments(fileobj, segments, n_bytes, lock=None): lock : threading.Lock If provided, used to ensure that paired calls to ``seek`` and ``read`` cannot be interrupted by another thread accessing the same ``fileobj``. - + Each thread which accesses the same file via ``read_segments`` must + share a lock in order to ensure that the file access is thread-safe. + A lock does not need to be provided for single-threaded access. Returns ------- @@ -647,11 +649,14 @@ def read_segments(fileobj, segments, n_bytes, lock=None): """ # Make a dummy lock-like thing to make the code below a bit nicer if lock is None: + class DummyLock(object): def __enter__(self): pass + def __exit__(self, exc_type, exc_val, exc_tb): return False + lock = DummyLock() if len(segments) == 0: diff --git a/nibabel/openers.py b/nibabel/openers.py index 7295f9805c..63c5604b48 100644 --- a/nibabel/openers.py +++ b/nibabel/openers.py @@ -72,6 +72,8 @@ def _gzip_open(fileish, mode='rb', compresslevel=9): # is this a file? if not we assume it is a string is_file = hasattr(fileish, 'read') and hasattr(fileish, 'write') and \ hasattr(fileish, 'mode') + + # If we've been given a file object, we can't change its mode. if is_file: mode = fileish.mode @@ -82,6 +84,9 @@ def _gzip_open(fileish, mode='rb', compresslevel=9): gzip_file = SafeIndexedGzipFile(fid=fileish, **kwargs) else: gzip_file = SafeIndexedGzipFile(filename=fileish, **kwargs) + + # Fall-back to built-in GzipFile (wrapped with the BufferedGzipFile class + # defined above) else: gzip_file = BufferedGzipFile(fileish, mode, compresslevel) From bd39417acc9b2ebc447dc758abda2a6e4fbd28fd Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Mon, 4 Sep 2017 11:50:07 +0100 Subject: [PATCH 08/33] TEST: Unit test to make sure that GzipFile/SafeIndexedGzipFile classes are created appropriately --- nibabel/tests/test_openers.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/nibabel/tests/test_openers.py b/nibabel/tests/test_openers.py index 550d0d414d..6481e1c31c 100644 --- a/nibabel/tests/test_openers.py +++ b/nibabel/tests/test_openers.py @@ -20,6 +20,7 @@ from nose.tools import (assert_true, assert_false, assert_equal, assert_not_equal, assert_raises) from ..testing import error_warnings +import mock class Lunk(object): @@ -95,6 +96,35 @@ def test_BinOpener(): BinOpener, 'test.txt', 'r') +def test_Opener_gzip_type(): + # Test that BufferedGzipFile or IndexedGzipFile are used as appropriate + + data = 'this is some test data' + fname = 'test.gz' + mockmod = mock.MagicMock() + + class MockClass(object): + def __init__(self, *args, **kwargs): + pass + + with InTemporaryDirectory(): + + # make some test data + with GzipFile(fname, mode='wb') as f: + f.write(data.encode()) + + # test with indexd_gzip not present + with mock.patch.dict('sys.modules', {'indexed_gzip' : None}): + assert isinstance(Opener(fname, mode='rb').fobj, GzipFile) + assert isinstance(Opener(fname, mode='wb').fobj, GzipFile) + + # test with indexd_gzip present + with mock.patch.dict('sys.modules', {'indexed_gzip' : mockmod}), \ + mock.patch('indexed_gzip.SafeIndexedGzipFile', MockClass): + assert isinstance(Opener(fname, mode='rb').fobj, MockClass) + assert isinstance(Opener(fname, mode='wb').fobj, GzipFile) + + class TestImageOpener: def setUp(self): From cc73dd6fd0cd4d4cf2660b6c38a0758659d8b77c Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Mon, 4 Sep 2017 14:38:47 +0100 Subject: [PATCH 09/33] BF: Typo in _gzip_open was preventing max_read_chunk from having any effect. --- nibabel/openers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nibabel/openers.py b/nibabel/openers.py index 63c5604b48..c04543bc4d 100644 --- a/nibabel/openers.py +++ b/nibabel/openers.py @@ -90,10 +90,10 @@ def _gzip_open(fileish, mode='rb', compresslevel=9): else: gzip_file = BufferedGzipFile(fileish, mode, compresslevel) - # Speedup for #209; attribute not present in in Python 3.5 - # open gzip files with faster reads on large files using larger - # See https://github.com/nipy/nibabel/pull/210 for discussion - if hasattr(gzip_file, 'max_chunk_read'): + # Speedup for #209, for versions of python < 3.5. Open gzip files with + # faster reads on large files using a larger read buffer. See + # https://github.com/nipy/nibabel/pull/210 for discussion + if hasattr(gzip_file, 'max_read_chunk'): gzip_file.max_read_chunk = GZIP_MAX_READ_CHUNK return gzip_file From 9ceaf7f32926dfd8f8453847ee00a31d29ee2686 Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Tue, 5 Sep 2017 21:47:11 +0100 Subject: [PATCH 10/33] TEST: Another fix to existing unit test --- nibabel/tests/test_openers.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nibabel/tests/test_openers.py b/nibabel/tests/test_openers.py index 6481e1c31c..6b9e61813c 100644 --- a/nibabel/tests/test_openers.py +++ b/nibabel/tests/test_openers.py @@ -232,7 +232,11 @@ class StrictOpener(Opener): if lext != ext: # extension should not be recognized -> file assert_true(isinstance(fobj.fobj, file_class)) elif lext == 'gz': - assert_true(isinstance(fobj.fobj, GzipFile)) + try: + from indexed_gzip import IndexedGzipFile + except ImportError: + IndexedGzipFile = GzipFile + assert_true(isinstance(fobj.fobj, (GzipFile, IndexedGzipFile))) else: assert_true(isinstance(fobj.fobj, BZ2File)) From bf2ad0df04e48cdfca9a181446f238fe4d289e7d Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Wed, 6 Sep 2017 10:15:57 +0100 Subject: [PATCH 11/33] TEST: Compatibility for numpy 1.7. Also some style fixes to make flake8 happier --- nibabel/openers.py | 6 +++--- nibabel/tests/test_arrayproxy.py | 2 +- nibabel/tests/test_fileslice.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/nibabel/openers.py b/nibabel/openers.py index c04543bc4d..609a52ef6d 100644 --- a/nibabel/openers.py +++ b/nibabel/openers.py @@ -70,8 +70,8 @@ def _gzip_open(fileish, mode='rb', compresslevel=9): have_indexed_gzip = False # is this a file? if not we assume it is a string - is_file = hasattr(fileish, 'read') and hasattr(fileish, 'write') and \ - hasattr(fileish, 'mode') + is_file = (hasattr(fileish, 'read') and hasattr(fileish, 'write') and + hasattr(fileish, 'mode') # If we've been given a file object, we can't change its mode. if is_file: @@ -79,7 +79,7 @@ def _gzip_open(fileish, mode='rb', compresslevel=9): # use indexed_gzip if possible for faster read access if mode == 'rb' and have_indexed_gzip: - kwargs = {'spacing' : 4194304, 'readbuf_size' : 1048576} + kwargs = {'spacing':4194304, 'readbuf_size':1048576} if hasattr(fileish, 'read') and hasattr(fileish, 'write'): gzip_file = SafeIndexedGzipFile(fid=fileish, **kwargs) else: diff --git a/nibabel/tests/test_arrayproxy.py b/nibabel/tests/test_arrayproxy.py index 7930b16964..1792152f1b 100644 --- a/nibabel/tests/test_arrayproxy.py +++ b/nibabel/tests/test_arrayproxy.py @@ -353,7 +353,7 @@ def __init__(self, *args, **kwargs): proxy_no_kfp = ArrayProxy(fname, ((10, 10, 10), dtype)) proxy_kfp = ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open=True) - voxels = np.random.randint(0, 10, (10, 3), dtype=np.uint32) + voxels = np.random.randint(0, 10, (10, 3)) for i in range(voxels.shape[0]): x , y, z = [int(c) for c in voxels[i, :]] assert proxy_no_kfp[x, y, z] == x * 100 + y * 10 + z diff --git a/nibabel/tests/test_fileslice.py b/nibabel/tests/test_fileslice.py index e14c9a414c..8896f9bf0e 100644 --- a/nibabel/tests/test_fileslice.py +++ b/nibabel/tests/test_fileslice.py @@ -694,7 +694,7 @@ def test_read_segments(): def test_read_segments_lock(): # Test read_segment locking with multiple threads fobj = BytesIO() - arr = np.random.randint(0, 256, 1000, dtype=np.uint8) + arr = np.array(np.random.randint(0, 256, 1000), dtype=np.uint8) fobj.write(arr.tostring()) # Encourage the interpreter to switch threads between a seek/read pair From 388daae26cafcfa6bf159b17fcd8b2bf6c5f94c8 Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Wed, 6 Sep 2017 10:17:55 +0100 Subject: [PATCH 12/33] BF: Typo --- nibabel/openers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nibabel/openers.py b/nibabel/openers.py index 609a52ef6d..c2718ecffa 100644 --- a/nibabel/openers.py +++ b/nibabel/openers.py @@ -71,7 +71,7 @@ def _gzip_open(fileish, mode='rb', compresslevel=9): # is this a file? if not we assume it is a string is_file = (hasattr(fileish, 'read') and hasattr(fileish, 'write') and - hasattr(fileish, 'mode') + hasattr(fileish, 'mode')) # If we've been given a file object, we can't change its mode. if is_file: From 382dcf8f1bbf85e95c60a37b172c7ccdea26d425 Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Wed, 6 Sep 2017 10:21:39 +0100 Subject: [PATCH 13/33] RF: Make _gzip_open a bit cleaner and simpler. Move the DummyLock out of read_segments, and to the model level. Cleaned up ArrayProxy._get_fileobj docs (and replaced all triple-single quotes with triple-double quotes) --- nibabel/arrayproxy.py | 30 +++++++++++++++++++----------- nibabel/fileslice.py | 27 +++++++++++++++++---------- nibabel/openers.py | 12 +++++------- 3 files changed, 41 insertions(+), 28 deletions(-) diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py index d1c3ae454b..34cc37e8da 100644 --- a/nibabel/arrayproxy.py +++ b/nibabel/arrayproxy.py @@ -135,21 +135,21 @@ def __init__(self, file_like, spec, mmap=True, keep_file_open=False): self._lock = Lock() def __del__(self): - '''If this ``ArrayProxy`` was created with ``keep_file_open=True``, + """If this ``ArrayProxy`` was created with ``keep_file_open=True``, the open file object is closed if necessary. - ''' + """ if hasattr(self, '_opener') and not self._opener.closed: self._opener.close() self._opener = None def __getstate__(self): - '''Returns the state of this ``ArrayProxy`` during pickling. ''' + """Returns the state of this ``ArrayProxy`` during pickling. """ state = self.__dict__.copy() state.pop('_lock', None) return state def __setstate__(self, state): - '''Sets the state of this ``ArrayProxy`` during unpickling. ''' + """Sets the state of this ``ArrayProxy`` during unpickling. """ self.__dict__.update(state) self._lock = Lock() @@ -184,10 +184,18 @@ def is_proxy(self): @contextmanager def _get_fileobj(self): - '''Create and return a new ``ImageOpener``, or return an existing one. - one. The specific behaviour depends on the value of the - ``keep_file_open`` flag that was passed to ``__init__``. - ''' + """Create and return a new ``ImageOpener``, or return an existing one. + + The specific behaviour depends on the value of the ``keep_file_open`` + flag that was passed to ``__init__``. + + + Yields + ------ + ImageOpener + A newly created ``ImageOpener`` instance, or an existing one, + which provides access to the file. + """ if self._keep_file_open: if not hasattr(self, '_opener'): self._opener = ImageOpener(self.file_like) @@ -197,10 +205,10 @@ def _get_fileobj(self): yield opener def get_unscaled(self): - ''' Read of data from file + """ Read of data from file This is an optional part of the proxy API - ''' + """ with self._get_fileobj() as fileobj: raw_data = array_from_file(self._shape, self._dtype, @@ -228,7 +236,7 @@ def __getitem__(self, slicer): return apply_read_scaling(raw_data, self._slope, self._inter) def reshape(self, shape): - ''' Return an ArrayProxy with a new shape, without modifying data ''' + """ Return an ArrayProxy with a new shape, without modifying data """ size = np.prod(self._shape) # Calculate new shape if not fully specified diff --git a/nibabel/fileslice.py b/nibabel/fileslice.py index 0a00a989aa..22ca4917df 100644 --- a/nibabel/fileslice.py +++ b/nibabel/fileslice.py @@ -17,6 +17,21 @@ SKIP_THRESH = 2 ** 8 + +class _NullLock(object): + """The ``_NullLock`` is an object which can be used in place of a + ``threading.Lock`` object, but doesn't actually do anything. + + It is used by the ``read_segments`` function in the event that a + ``Lock`` is not provided by the caller. + """ + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_val, exc_tb): + return False + + def is_fancy(sliceobj): """ Returns True if sliceobj is attempting fancy indexing @@ -647,17 +662,9 @@ def read_segments(fileobj, segments, n_bytes, lock=None): object implementing buffer protocol, such as byte string or ndarray or mmap or ctypes ``c_char_array`` """ - # Make a dummy lock-like thing to make the code below a bit nicer + # Make a lock-like thing to make the code below a bit nicer if lock is None: - - class DummyLock(object): - def __enter__(self): - pass - - def __exit__(self, exc_type, exc_val, exc_tb): - return False - - lock = DummyLock() + lock = _NullLock() if len(segments) == 0: if n_bytes != 0: diff --git a/nibabel/openers.py b/nibabel/openers.py index c2718ecffa..231c1f44aa 100644 --- a/nibabel/openers.py +++ b/nibabel/openers.py @@ -70,20 +70,18 @@ def _gzip_open(fileish, mode='rb', compresslevel=9): have_indexed_gzip = False # is this a file? if not we assume it is a string - is_file = (hasattr(fileish, 'read') and hasattr(fileish, 'write') and - hasattr(fileish, 'mode')) + is_file = hasattr(fileish, 'read') and hasattr(fileish, 'write') # If we've been given a file object, we can't change its mode. - if is_file: + if is_file and hasattr(fileish, 'mode'): mode = fileish.mode # use indexed_gzip if possible for faster read access if mode == 'rb' and have_indexed_gzip: - kwargs = {'spacing':4194304, 'readbuf_size':1048576} - if hasattr(fileish, 'read') and hasattr(fileish, 'write'): - gzip_file = SafeIndexedGzipFile(fid=fileish, **kwargs) + if is_file: + gzip_file = SafeIndexedGzipFile(fid=fileish) else: - gzip_file = SafeIndexedGzipFile(filename=fileish, **kwargs) + gzip_file = SafeIndexedGzipFile(filename=fileish) # Fall-back to built-in GzipFile (wrapped with the BufferedGzipFile class # defined above) From 89a2d06e80297ea332b517fe4d036f3f39e55fde Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Wed, 6 Sep 2017 13:45:45 +0100 Subject: [PATCH 14/33] BF: ArrayProxy was closing pre-opened file handle when keep_file_open was True. --- nibabel/arrayproxy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py index 34cc37e8da..124a231a47 100644 --- a/nibabel/arrayproxy.py +++ b/nibabel/arrayproxy.py @@ -139,7 +139,7 @@ def __del__(self): the open file object is closed if necessary. """ if hasattr(self, '_opener') and not self._opener.closed: - self._opener.close() + self._opener.close_if_mine() self._opener = None def __getstate__(self): From a6972c00566c2d4b8ecd35b7385ced110acc8a5d Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Wed, 6 Sep 2017 13:46:24 +0100 Subject: [PATCH 15/33] TEST: Unit test to make sure that keep_file_open works for pre-opened file objects --- nibabel/tests/test_arrayproxy.py | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/nibabel/tests/test_arrayproxy.py b/nibabel/tests/test_arrayproxy.py index 1792152f1b..c23495f03f 100644 --- a/nibabel/tests/test_arrayproxy.py +++ b/nibabel/tests/test_arrayproxy.py @@ -335,7 +335,6 @@ def check_mmap(hdr, offset, proxy_class, def test_keep_file_open(): # Test the behaviour of the keep_file_open __init__ flag. numopeners = [0] - class CountingImageOpener(ImageOpener): def __init__(self, *args, **kwargs): @@ -345,24 +344,46 @@ def __init__(self, *args, **kwargs): fname = 'testdata' dtype = np.float32 - data = np.arange(1000, dtype=np.float32).reshape((10, 10, 10)) + data = np.arange(1000, dtype=dtype).reshape((10, 10, 10)) + voxels = np.random.randint(0, 10, (10, 3)) with InTemporaryDirectory(): with open(fname, 'wb') as fobj: fobj.write(data.tostring(order='F')) + # Test that ArrayProxy(keep_file_open=True) only creates one file + # handle, and that ArrayProxy(keep_file_open=False) creates a file + # handle on every data access. with mock.patch('nibabel.arrayproxy.ImageOpener', CountingImageOpener): proxy_no_kfp = ArrayProxy(fname, ((10, 10, 10), dtype)) - proxy_kfp = ArrayProxy(fname, ((10, 10, 10), dtype), - keep_file_open=True) - voxels = np.random.randint(0, 10, (10, 3)) for i in range(voxels.shape[0]): x , y, z = [int(c) for c in voxels[i, :]] assert proxy_no_kfp[x, y, z] == x * 100 + y * 10 + z assert numopeners[0] == i + 1 numopeners[0] = 0 + proxy_kfp = ArrayProxy(fname, ((10, 10, 10), dtype), + keep_file_open=True) for i in range(voxels.shape[0]): x , y, z = [int(c) for c in voxels[i, :]] assert proxy_kfp[x, y, z] == x * 100 + y * 10 + z assert numopeners[0] == 1 + # Test that the keep_file_open flag has no effect if an open file + # handle is passed in + with open(fname, 'rb') as fobj: + proxy_no_kfp = ArrayProxy(fobj, ((10, 10, 10), dtype), + keep_file_open=False) + for i in range(voxels.shape[0]): + assert proxy_no_kfp[x, y, z] == x * 100 + y * 10 + z + assert not fobj.closed + del proxy_no_kfp + proxy_no_kfp = None + assert not fobj.closed + proxy_kfp = ArrayProxy(fobj, ((10, 10, 10), dtype), + keep_file_open=True) + for i in range(voxels.shape[0]): + assert proxy_kfp[x, y, z] == x * 100 + y * 10 + z + assert not fobj.closed + del proxy_kfp + proxy_kfp = None + assert not fobj.closed def test_pickle_lock(): From 0a111caeacb3d4715a617f5593f512381a35a5ed Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Thu, 7 Sep 2017 15:41:41 +0100 Subject: [PATCH 16/33] RF: Adjust the default behaviour of the ArrayProxy keep_file_open flag - if file is gz, and indexed_gzip is True, kfo is set to True. ArrayProxy is also using an RLock instead of a Lock, as I don't think there is a good reason not to. --- nibabel/analyze.py | 36 ++++++++++++++------- nibabel/arrayproxy.py | 70 +++++++++++++++++++++++++++++++++++------ nibabel/fileslice.py | 1 - nibabel/spm99analyze.py | 18 +++++++---- 4 files changed, 96 insertions(+), 29 deletions(-) diff --git a/nibabel/analyze.py b/nibabel/analyze.py index 68edf926fc..9d0c1d9167 100644 --- a/nibabel/analyze.py +++ b/nibabel/analyze.py @@ -934,7 +934,7 @@ def set_data_dtype(self, dtype): @classmethod @kw_only_meth(1) - def from_file_map(klass, file_map, mmap=True, keep_file_open=False): + def from_file_map(klass, file_map, mmap=True, keep_file_open='indexed'): ''' class method to create image from mapping in `file_map `` Parameters @@ -950,11 +950,17 @@ def from_file_map(klass, file_map, mmap=True, keep_file_open=False): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. - keep_file_open: If ``file_like`` is a file name, the default behaviour - is to open a new file handle every time the data is accessed. If - this flag is set to `True``, the file handle will be opened on the - first access, and kept open until this ``ArrayProxy`` is garbage- - collected. + keep_file_open : { 'indexed', True, False }, optional, keyword only + `keep_file_open` controls whether a new file handle is created + every time the image is accessed, or a single file handle is + created and used for the lifetime of this ``ArrayProxy``. If + ``True``, a single file handle is created and used. If ``False``, + a new file handle is created every time the image is accessed. If + ``'indexed'`` (the default), and the optional ``indexed_gzip`` + dependency is present, a single file handle is created and + persisted. If ``indexed_gzip`` is not available, behaviour is the + same as if ``keep_file_open is False``. If ``file_like`` is an + open file handle, this setting has no effect. Returns ------- @@ -982,7 +988,7 @@ def from_file_map(klass, file_map, mmap=True, keep_file_open=False): @classmethod @kw_only_meth(1) - def from_filename(klass, filename, mmap=True, keep_file_open=False): + def from_filename(klass, filename, mmap=True, keep_file_open='indexed'): ''' class method to create image from filename `filename` Parameters @@ -996,11 +1002,17 @@ def from_filename(klass, filename, mmap=True, keep_file_open=False): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. - keep_file_open: If ``file_like`` is a file name, the default behaviour - is to open a new file handle every time the data is accessed. If - this flag is set to `True``, the file handle will be opened on the - first access, and kept open until this ``ArrayProxy`` is garbage- - collected. + keep_file_open : { 'indexed', True, False }, optional, keyword only + `keep_file_open` controls whether a new file handle is created + every time the image is accessed, or a single file handle is + created and used for the lifetime of this ``ArrayProxy``. If + ``True``, a single file handle is created and used. If ``False``, + a new file handle is created every time the image is accessed. If + ``'indexed'`` (the default), and the optional ``indexed_gzip`` + dependency is present, a single file handle is created and + persisted. If ``indexed_gzip`` is not available, behaviour is the + same as if ``keep_file_open is False``. If ``file_like`` is an + open file handle, this setting has no effect. Returns ------- diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py index 124a231a47..705dbcbf11 100644 --- a/nibabel/arrayproxy.py +++ b/nibabel/arrayproxy.py @@ -26,7 +26,7 @@ See :mod:`nibabel.tests.test_proxy_api` for proxy API conformance checks. """ from contextlib import contextmanager -from threading import Lock +from threading import RLock import numpy as np @@ -72,7 +72,7 @@ class ArrayProxy(object): _header = None @kw_only_meth(2) - def __init__(self, file_like, spec, mmap=True, keep_file_open=False): + def __init__(self, file_like, spec, mmap=True, keep_file_open='indexed'): """Initialize array proxy instance Parameters @@ -102,11 +102,17 @@ def __init__(self, file_like, spec, mmap=True, keep_file_open=False): True gives the same behavior as ``mmap='c'``. If `file_like` cannot be memory-mapped, ignore `mmap` value and read array from file. - keep_file_open: If ``file_like`` is a file name, the default behaviour - is to open a new file handle every time the data is accessed. If - this flag is set to `True``, the file handle will be opened on the - first access, and kept open until this ``ArrayProxy`` is garbage- - collected. + keep_file_open : { 'indexed', True, False }, optional, keyword only + `keep_file_open` controls whether a new file handle is created + every time the image is accessed, or a single file handle is + created and used for the lifetime of this ``ArrayProxy``. If + ``True``, a single file handle is created and used. If ``False``, + a new file handle is created every time the image is accessed. If + ``'indexed'`` (the default), and the optional ``indexed_gzip`` + dependency is present, a single file handle is created and + persisted. If ``indexed_gzip`` is not available, behaviour is the + same as if ``keep_file_open is False``. If ``file_like`` is an + open file handle, this setting has no effect. """ if mmap not in (True, False, 'c', 'r'): raise ValueError("mmap should be one of {True, False, 'c', 'r'}") @@ -131,8 +137,9 @@ def __init__(self, file_like, spec, mmap=True, keep_file_open=False): # Permit any specifier that can be interpreted as a numpy dtype self._dtype = np.dtype(self._dtype) self._mmap = mmap - self._keep_file_open = keep_file_open - self._lock = Lock() + self._keep_file_open = self._should_keep_file_open(file_like, + keep_file_open) + self._lock = RLock() def __del__(self): """If this ``ArrayProxy`` was created with ``keep_file_open=True``, @@ -151,7 +158,50 @@ def __getstate__(self): def __setstate__(self, state): """Sets the state of this ``ArrayProxy`` during unpickling. """ self.__dict__.update(state) - self._lock = Lock() + self._lock = RLock() + + def _should_keep_file_open(self, file_like, keep_file_open): + """Called by ``__init__``, and used to determine the final value of + ``keep_file_open``. + + The return value is derived from these rules: + + - If ``file_like`` is a file(-like) object, ``False`` is returned. + Otherwise, ``file_like`` is assumed to be a file name. + - if ``file_like`` ends with ``'gz'``, and the ``indexed_gzip`` + library is available, ``True`` is returned. + - Otherwise, ``False`` is returned. + + Parameters + ---------- + + file_like : object + File-like object or filename, as passed to ``__init__``. + keep_file_open : { 'indexed', True, False } + Flag as passed to ``__init__``. + + Returns + ------- + + The value of ``keep_file_open`` that will be used by this + ``ArrayProxy``. + """ + + # file_like is a handle - keep_file_open is irrelevant + if hasattr(file_like, 'read') and hasattr(file_like, 'seek'): + return False + # if keep_file_open is True/False, we do what the user wants us to do + if keep_file_open != 'indexed': + return bool(keep_file_open) + # Otherwise, if file_like is gzipped, and we have_indexed_gzip, we set + # keep_file_open to True, else we set it to False + try: + import indexed_gzip + have_indexed_gzip = True + except ImportError: + have_indexed_gzip = False + + return have_indexed_gzip and file_like.endswith('gz') @property @deprecate_with_version('ArrayProxy.header deprecated', '2.2', '3.0') diff --git a/nibabel/fileslice.py b/nibabel/fileslice.py index 22ca4917df..df93930cba 100644 --- a/nibabel/fileslice.py +++ b/nibabel/fileslice.py @@ -17,7 +17,6 @@ SKIP_THRESH = 2 ** 8 - class _NullLock(object): """The ``_NullLock`` is an object which can be used in place of a ``threading.Lock`` object, but doesn't actually do anything. diff --git a/nibabel/spm99analyze.py b/nibabel/spm99analyze.py index 122fb1480e..87069b4efd 100644 --- a/nibabel/spm99analyze.py +++ b/nibabel/spm99analyze.py @@ -245,7 +245,7 @@ class Spm99AnalyzeImage(analyze.AnalyzeImage): @classmethod @kw_only_meth(1) - def from_file_map(klass, file_map, mmap=True, keep_file_open=False): + def from_file_map(klass, file_map, mmap=True, keep_file_open='indexed'): '''class method to create image from mapping in `file_map `` Parameters @@ -261,11 +261,17 @@ def from_file_map(klass, file_map, mmap=True, keep_file_open=False): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. - keep_file_open: If the file-ilke(s) is a file name, the default - behaviour is to open a new file handle every time the data is - accessed. If this flag is set to `True``, the file handle will be - opened on the first access, and kept open until this - ``ArrayProxy`` is garbage-collected. + keep_file_open : { 'indexed', True, False }, optional, keyword only + `keep_file_open` controls whether a new file handle is created + every time the image is accessed, or a single file handle is + created and used for the lifetime of this ``ArrayProxy``. If + ``True``, a single file handle is created and used. If ``False``, + a new file handle is created every time the image is accessed. If + ``'indexed'`` (the default), and the optional ``indexed_gzip`` + dependency is present, a single file handle is created and + persisted. If ``indexed_gzip`` is not available, behaviour is the + same as if ``keep_file_open is False``. If ``file_like`` is an + open file handle, this setting has no effect. Returns ------- From 961f882f06b69ff846c930935c55c6b0d9395bcd Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Thu, 7 Sep 2017 17:14:48 +0100 Subject: [PATCH 17/33] TEST: New test which checks default behaviour of the ArrayProxy keep_file_open flag. Other minor refactorings --- nibabel/tests/test_arrayproxy.py | 52 ++++++++++++++++++++++++++------ nibabel/tests/test_openers.py | 6 ++-- 2 files changed, 45 insertions(+), 13 deletions(-) diff --git a/nibabel/tests/test_arrayproxy.py b/nibabel/tests/test_arrayproxy.py index c23495f03f..d7eece601e 100644 --- a/nibabel/tests/test_arrayproxy.py +++ b/nibabel/tests/test_arrayproxy.py @@ -11,6 +11,7 @@ from __future__ import division, print_function, absolute_import import warnings +import gzip import pickle from io import BytesIO @@ -332,16 +333,22 @@ def check_mmap(hdr, offset, proxy_class, assert_raises(ValueError, proxy_class, fname, hdr, mmap='r+') -def test_keep_file_open(): - # Test the behaviour of the keep_file_open __init__ flag. - numopeners = [0] - class CountingImageOpener(ImageOpener): +# An image opener class which counts how many instances of itself have been +# created +class CountingImageOpener(ImageOpener): - def __init__(self, *args, **kwargs): + numOpeners = 0 - super(CountingImageOpener, self).__init__(*args, **kwargs) - numopeners[0] += 1 + def __init__(self, *args, **kwargs): + super(CountingImageOpener, self).__init__(*args, **kwargs) + CountingImageOpener.numOpeners += 1 + + +def test_keep_file_open_true_false(): + # Test the behaviour of the keep_file_open __init__ flag, when it is set to + # True or False. + CountingImageOpener.numOpeners = 0 fname = 'testdata' dtype = np.float32 data = np.arange(1000, dtype=dtype).reshape((10, 10, 10)) @@ -354,22 +361,25 @@ def __init__(self, *args, **kwargs): # handle on every data access. with mock.patch('nibabel.arrayproxy.ImageOpener', CountingImageOpener): proxy_no_kfp = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert not proxy_no_kfp._keep_file_open for i in range(voxels.shape[0]): x , y, z = [int(c) for c in voxels[i, :]] assert proxy_no_kfp[x, y, z] == x * 100 + y * 10 + z - assert numopeners[0] == i + 1 - numopeners[0] = 0 + assert CountingImageOpener.numOpeners == i + 1 + CountingImageOpener.numOpeners = 0 proxy_kfp = ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open=True) + assert proxy_kfp._keep_file_open for i in range(voxels.shape[0]): x , y, z = [int(c) for c in voxels[i, :]] assert proxy_kfp[x, y, z] == x * 100 + y * 10 + z - assert numopeners[0] == 1 + assert CountingImageOpener.numOpeners == 1 # Test that the keep_file_open flag has no effect if an open file # handle is passed in with open(fname, 'rb') as fobj: proxy_no_kfp = ArrayProxy(fobj, ((10, 10, 10), dtype), keep_file_open=False) + assert not proxy_no_kfp._keep_file_open for i in range(voxels.shape[0]): assert proxy_no_kfp[x, y, z] == x * 100 + y * 10 + z assert not fobj.closed @@ -378,6 +388,7 @@ def __init__(self, *args, **kwargs): assert not fobj.closed proxy_kfp = ArrayProxy(fobj, ((10, 10, 10), dtype), keep_file_open=True) + assert not proxy_kfp._keep_file_open for i in range(voxels.shape[0]): assert proxy_kfp[x, y, z] == x * 100 + y * 10 + z assert not fobj.closed @@ -386,6 +397,27 @@ def __init__(self, *args, **kwargs): assert not fobj.closed +def test_keep_file_open_default(): + # Test the behaviour of the keep_file_open __init__ flag, when it is set to + # its default value + dtype = np.float32 + data = np.arange(1000, dtype=dtype).reshape((10, 10, 10)) + voxels = np.random.randint(0, 10, (10, 3)) + mockmod = mock.MagicMock() + with InTemporaryDirectory(): + fname = 'testdata.gz' + with gzip.open(fname, 'wb') as fobj: + fobj.write(data.tostring(order='F')) + # If have_indexed_gzip, then keep_file_open should be True + with mock.patch.dict('sys.modules', {'indexed_gzip' : mockmod}), \ + mock.patch('indexed_gzip.SafeIndexedGzipFile', gzip.GzipFile): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert proxy._keep_file_open + # If no have_indexed_gzip, then keep_file_open should be False + with mock.patch.dict('sys.modules', {'indexed_gzip' : None}): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert not proxy._keep_file_open + def test_pickle_lock(): # Test that ArrayProxy can be pickled, and that thread lock is created diff --git a/nibabel/tests/test_openers.py b/nibabel/tests/test_openers.py index 6b9e61813c..52ef26520a 100644 --- a/nibabel/tests/test_openers.py +++ b/nibabel/tests/test_openers.py @@ -103,7 +103,7 @@ def test_Opener_gzip_type(): fname = 'test.gz' mockmod = mock.MagicMock() - class MockClass(object): + class MockIGZFile(object): def __init__(self, *args, **kwargs): pass @@ -120,8 +120,8 @@ def __init__(self, *args, **kwargs): # test with indexd_gzip present with mock.patch.dict('sys.modules', {'indexed_gzip' : mockmod}), \ - mock.patch('indexed_gzip.SafeIndexedGzipFile', MockClass): - assert isinstance(Opener(fname, mode='rb').fobj, MockClass) + mock.patch('indexed_gzip.SafeIndexedGzipFile', MockIGZFile): + assert isinstance(Opener(fname, mode='rb').fobj, MockIGZFile) assert isinstance(Opener(fname, mode='wb').fobj, GzipFile) From cd65af461b9ebc23b33fe87f65f96dda830a485e Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Thu, 7 Sep 2017 17:17:49 +0100 Subject: [PATCH 18/33] RF: Changed default keep_file_open value to 'auto', to make it a bit more general --- nibabel/analyze.py | 12 ++++++------ nibabel/arrayproxy.py | 10 +++++----- nibabel/spm99analyze.py | 6 +++--- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/nibabel/analyze.py b/nibabel/analyze.py index 9d0c1d9167..cdbec1b446 100644 --- a/nibabel/analyze.py +++ b/nibabel/analyze.py @@ -934,7 +934,7 @@ def set_data_dtype(self, dtype): @classmethod @kw_only_meth(1) - def from_file_map(klass, file_map, mmap=True, keep_file_open='indexed'): + def from_file_map(klass, file_map, mmap=True, keep_file_open='auto'): ''' class method to create image from mapping in `file_map `` Parameters @@ -950,13 +950,13 @@ def from_file_map(klass, file_map, mmap=True, keep_file_open='indexed'): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. - keep_file_open : { 'indexed', True, False }, optional, keyword only + keep_file_open : { 'auto', True, False }, optional, keyword only `keep_file_open` controls whether a new file handle is created every time the image is accessed, or a single file handle is created and used for the lifetime of this ``ArrayProxy``. If ``True``, a single file handle is created and used. If ``False``, a new file handle is created every time the image is accessed. If - ``'indexed'`` (the default), and the optional ``indexed_gzip`` + ``'auto'`` (the default), and the optional ``indexed_gzip`` dependency is present, a single file handle is created and persisted. If ``indexed_gzip`` is not available, behaviour is the same as if ``keep_file_open is False``. If ``file_like`` is an @@ -988,7 +988,7 @@ def from_file_map(klass, file_map, mmap=True, keep_file_open='indexed'): @classmethod @kw_only_meth(1) - def from_filename(klass, filename, mmap=True, keep_file_open='indexed'): + def from_filename(klass, filename, mmap=True, keep_file_open='auto'): ''' class method to create image from filename `filename` Parameters @@ -1002,13 +1002,13 @@ def from_filename(klass, filename, mmap=True, keep_file_open='indexed'): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. - keep_file_open : { 'indexed', True, False }, optional, keyword only + keep_file_open : { 'auto', True, False }, optional, keyword only `keep_file_open` controls whether a new file handle is created every time the image is accessed, or a single file handle is created and used for the lifetime of this ``ArrayProxy``. If ``True``, a single file handle is created and used. If ``False``, a new file handle is created every time the image is accessed. If - ``'indexed'`` (the default), and the optional ``indexed_gzip`` + ``'auto'`` (the default), and the optional ``indexed_gzip`` dependency is present, a single file handle is created and persisted. If ``indexed_gzip`` is not available, behaviour is the same as if ``keep_file_open is False``. If ``file_like`` is an diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py index 705dbcbf11..a66582bb6d 100644 --- a/nibabel/arrayproxy.py +++ b/nibabel/arrayproxy.py @@ -72,7 +72,7 @@ class ArrayProxy(object): _header = None @kw_only_meth(2) - def __init__(self, file_like, spec, mmap=True, keep_file_open='indexed'): + def __init__(self, file_like, spec, mmap=True, keep_file_open='auto'): """Initialize array proxy instance Parameters @@ -102,13 +102,13 @@ def __init__(self, file_like, spec, mmap=True, keep_file_open='indexed'): True gives the same behavior as ``mmap='c'``. If `file_like` cannot be memory-mapped, ignore `mmap` value and read array from file. - keep_file_open : { 'indexed', True, False }, optional, keyword only + keep_file_open : { 'auto', True, False }, optional, keyword only `keep_file_open` controls whether a new file handle is created every time the image is accessed, or a single file handle is created and used for the lifetime of this ``ArrayProxy``. If ``True``, a single file handle is created and used. If ``False``, a new file handle is created every time the image is accessed. If - ``'indexed'`` (the default), and the optional ``indexed_gzip`` + ``'auto'`` (the default), and the optional ``indexed_gzip`` dependency is present, a single file handle is created and persisted. If ``indexed_gzip`` is not available, behaviour is the same as if ``keep_file_open is False``. If ``file_like`` is an @@ -177,7 +177,7 @@ def _should_keep_file_open(self, file_like, keep_file_open): file_like : object File-like object or filename, as passed to ``__init__``. - keep_file_open : { 'indexed', True, False } + keep_file_open : { 'auto', True, False } Flag as passed to ``__init__``. Returns @@ -191,7 +191,7 @@ def _should_keep_file_open(self, file_like, keep_file_open): if hasattr(file_like, 'read') and hasattr(file_like, 'seek'): return False # if keep_file_open is True/False, we do what the user wants us to do - if keep_file_open != 'indexed': + if keep_file_open != 'auto': return bool(keep_file_open) # Otherwise, if file_like is gzipped, and we have_indexed_gzip, we set # keep_file_open to True, else we set it to False diff --git a/nibabel/spm99analyze.py b/nibabel/spm99analyze.py index 87069b4efd..b67540a840 100644 --- a/nibabel/spm99analyze.py +++ b/nibabel/spm99analyze.py @@ -245,7 +245,7 @@ class Spm99AnalyzeImage(analyze.AnalyzeImage): @classmethod @kw_only_meth(1) - def from_file_map(klass, file_map, mmap=True, keep_file_open='indexed'): + def from_file_map(klass, file_map, mmap=True, keep_file_open='auto'): '''class method to create image from mapping in `file_map `` Parameters @@ -261,13 +261,13 @@ def from_file_map(klass, file_map, mmap=True, keep_file_open='indexed'): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. - keep_file_open : { 'indexed', True, False }, optional, keyword only + keep_file_open : { 'auto', True, False }, optional, keyword only `keep_file_open` controls whether a new file handle is created every time the image is accessed, or a single file handle is created and used for the lifetime of this ``ArrayProxy``. If ``True``, a single file handle is created and used. If ``False``, a new file handle is created every time the image is accessed. If - ``'indexed'`` (the default), and the optional ``indexed_gzip`` + ``'auto'`` (the default), and the optional ``indexed_gzip`` dependency is present, a single file handle is created and persisted. If ``indexed_gzip`` is not available, behaviour is the same as if ``keep_file_open is False``. If ``file_like`` is an From bfc001392579711af5cc37037cdd59d1b5faa304 Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Thu, 7 Sep 2017 17:18:05 +0100 Subject: [PATCH 19/33] TEST: Test explicitly passing in value for ArrayProxy keep_file_open parameter --- nibabel/tests/test_arrayproxy.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nibabel/tests/test_arrayproxy.py b/nibabel/tests/test_arrayproxy.py index d7eece601e..7e2e5dd2ec 100644 --- a/nibabel/tests/test_arrayproxy.py +++ b/nibabel/tests/test_arrayproxy.py @@ -413,10 +413,14 @@ def test_keep_file_open_default(): mock.patch('indexed_gzip.SafeIndexedGzipFile', gzip.GzipFile): proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) assert proxy._keep_file_open + proxy = ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='auto') + assert proxy._keep_file_open # If no have_indexed_gzip, then keep_file_open should be False with mock.patch.dict('sys.modules', {'indexed_gzip' : None}): proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) assert not proxy._keep_file_open + proxy = ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='auto') + assert not proxy._keep_file_open def test_pickle_lock(): # Test that ArrayProxy can be pickled, and that thread lock is created From d9a1ebd26ae169e1ed90afde32da665eb4a8fdd8 Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Fri, 8 Sep 2017 09:26:55 +0100 Subject: [PATCH 20/33] TEST: Make list element value change thread safe in fileslice/thread safety test. --- nibabel/tests/test_fileslice.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nibabel/tests/test_fileslice.py b/nibabel/tests/test_fileslice.py index 8896f9bf0e..e9cfe8e0a4 100644 --- a/nibabel/tests/test_fileslice.py +++ b/nibabel/tests/test_fileslice.py @@ -737,7 +737,9 @@ def runtest(): seg, nbytes = random_segments(10) expected = get_expected(seg) _check_bytes(read_segments(fobj, seg, nbytes, lock), expected) - numpassed[0] += 1 + + with lock: + numpassed[0] += 1 threads = [Thread(target=runtest) for i in range(100)] [t.start() for t in threads] From 093ae6e56ad3c6c517cea7d28bfea6c3de8eecaa Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Fri, 8 Sep 2017 09:50:58 +0100 Subject: [PATCH 21/33] RF: Indexed_gzip import test performed once at top of openers.py. Additional lock protection in ArrayProxy, on call to volumeutils.array_from_file. --- nibabel/arrayproxy.py | 12 +++--------- nibabel/openers.py | 16 ++++++++-------- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py index a66582bb6d..d16b16576f 100644 --- a/nibabel/arrayproxy.py +++ b/nibabel/arrayproxy.py @@ -34,7 +34,7 @@ from .volumeutils import array_from_file, apply_read_scaling from .fileslice import fileslice from .keywordonly import kw_only_meth -from .openers import ImageOpener +from .openers import ImageOpener, HAVE_INDEXED_GZIP class ArrayProxy(object): @@ -195,13 +195,7 @@ def _should_keep_file_open(self, file_like, keep_file_open): return bool(keep_file_open) # Otherwise, if file_like is gzipped, and we have_indexed_gzip, we set # keep_file_open to True, else we set it to False - try: - import indexed_gzip - have_indexed_gzip = True - except ImportError: - have_indexed_gzip = False - - return have_indexed_gzip and file_like.endswith('gz') + return HAVE_INDEXED_GZIP and file_like.endswith('gz') @property @deprecate_with_version('ArrayProxy.header deprecated', '2.2', '3.0') @@ -259,7 +253,7 @@ def get_unscaled(self): This is an optional part of the proxy API """ - with self._get_fileobj() as fileobj: + with self._get_fileobj() as fileobj, self._lock: raw_data = array_from_file(self._shape, self._dtype, fileobj, diff --git a/nibabel/openers.py b/nibabel/openers.py index 231c1f44aa..067d38c409 100644 --- a/nibabel/openers.py +++ b/nibabel/openers.py @@ -14,6 +14,13 @@ import sys from os.path import splitext +# is indexed_gzip present? +try: + from indexed_gzip import SafeIndexedGzipFile + HAVE_INDEXED_GZIP = True +except: + HAVE_INDEXED_GZIP = False + # The largest memory chunk that gzip can use for reads GZIP_MAX_READ_CHUNK = 100 * 1024 * 1024 # 100Mb @@ -62,13 +69,6 @@ def readinto(self, buf): def _gzip_open(fileish, mode='rb', compresslevel=9): - # is indexed_gzip present? - try: - from indexed_gzip import SafeIndexedGzipFile - have_indexed_gzip = True - except: - have_indexed_gzip = False - # is this a file? if not we assume it is a string is_file = hasattr(fileish, 'read') and hasattr(fileish, 'write') @@ -77,7 +77,7 @@ def _gzip_open(fileish, mode='rb', compresslevel=9): mode = fileish.mode # use indexed_gzip if possible for faster read access - if mode == 'rb' and have_indexed_gzip: + if mode == 'rb' and HAVE_INDEXED_GZIP: if is_file: gzip_file = SafeIndexedGzipFile(fid=fileish) else: From d7c7cacb0798a381dc89440d4cf0111cbd74ae73 Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Fri, 8 Sep 2017 14:42:50 +0100 Subject: [PATCH 22/33] DOC: Fixed docstrings referring to keep_file_open --- nibabel/analyze.py | 7 +++---- nibabel/spm99analyze.py | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/nibabel/analyze.py b/nibabel/analyze.py index cdbec1b446..e43b4cf03c 100644 --- a/nibabel/analyze.py +++ b/nibabel/analyze.py @@ -959,8 +959,8 @@ def from_file_map(klass, file_map, mmap=True, keep_file_open='auto'): ``'auto'`` (the default), and the optional ``indexed_gzip`` dependency is present, a single file handle is created and persisted. If ``indexed_gzip`` is not available, behaviour is the - same as if ``keep_file_open is False``. If ``file_like`` is an - open file handle, this setting has no effect. + same as if ``keep_file_open is False``. If ``file_map`` refers + to an open file handle, this setting has no effect. Returns ------- @@ -1011,8 +1011,7 @@ def from_filename(klass, filename, mmap=True, keep_file_open='auto'): ``'auto'`` (the default), and the optional ``indexed_gzip`` dependency is present, a single file handle is created and persisted. If ``indexed_gzip`` is not available, behaviour is the - same as if ``keep_file_open is False``. If ``file_like`` is an - open file handle, this setting has no effect. + same as if ``keep_file_open is False``. Returns ------- diff --git a/nibabel/spm99analyze.py b/nibabel/spm99analyze.py index b67540a840..32d4c27b34 100644 --- a/nibabel/spm99analyze.py +++ b/nibabel/spm99analyze.py @@ -270,8 +270,8 @@ def from_file_map(klass, file_map, mmap=True, keep_file_open='auto'): ``'auto'`` (the default), and the optional ``indexed_gzip`` dependency is present, a single file handle is created and persisted. If ``indexed_gzip`` is not available, behaviour is the - same as if ``keep_file_open is False``. If ``file_like`` is an - open file handle, this setting has no effect. + same as if ``keep_file_open is False``. If ``file_map`` refers to + an open file handle, this setting has no effect. Returns ------- From 39dea17a1d09e6962ee87c9ba28ecd595ad7d500 Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Fri, 8 Sep 2017 15:04:23 +0100 Subject: [PATCH 23/33] TEST: Update ArrayProxy/Opener tests to work with new HAVE_INDEXED_GZIP flag --- nibabel/tests/test_arrayproxy.py | 12 +++++++----- nibabel/tests/test_openers.py | 11 ++++++++--- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/nibabel/tests/test_arrayproxy.py b/nibabel/tests/test_arrayproxy.py index 7e2e5dd2ec..7caab690fe 100644 --- a/nibabel/tests/test_arrayproxy.py +++ b/nibabel/tests/test_arrayproxy.py @@ -402,21 +402,23 @@ def test_keep_file_open_default(): # its default value dtype = np.float32 data = np.arange(1000, dtype=dtype).reshape((10, 10, 10)) - voxels = np.random.randint(0, 10, (10, 3)) - mockmod = mock.MagicMock() with InTemporaryDirectory(): fname = 'testdata.gz' with gzip.open(fname, 'wb') as fobj: fobj.write(data.tostring(order='F')) # If have_indexed_gzip, then keep_file_open should be True - with mock.patch.dict('sys.modules', {'indexed_gzip' : mockmod}), \ - mock.patch('indexed_gzip.SafeIndexedGzipFile', gzip.GzipFile): + with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', True), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', True), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', gzip.GzipFile, + create=True): proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) assert proxy._keep_file_open proxy = ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='auto') assert proxy._keep_file_open # If no have_indexed_gzip, then keep_file_open should be False - with mock.patch.dict('sys.modules', {'indexed_gzip' : None}): + with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', False), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', False), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', None, create=True): proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) assert not proxy._keep_file_open proxy = ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='auto') diff --git a/nibabel/tests/test_openers.py b/nibabel/tests/test_openers.py index 52ef26520a..e48d06dbf0 100644 --- a/nibabel/tests/test_openers.py +++ b/nibabel/tests/test_openers.py @@ -114,13 +114,18 @@ def __init__(self, *args, **kwargs): f.write(data.encode()) # test with indexd_gzip not present - with mock.patch.dict('sys.modules', {'indexed_gzip' : None}): + with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', False), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', False), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', None, + create=True): assert isinstance(Opener(fname, mode='rb').fobj, GzipFile) assert isinstance(Opener(fname, mode='wb').fobj, GzipFile) # test with indexd_gzip present - with mock.patch.dict('sys.modules', {'indexed_gzip' : mockmod}), \ - mock.patch('indexed_gzip.SafeIndexedGzipFile', MockIGZFile): + with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', True), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', True), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', MockIGZFile, + create=True): assert isinstance(Opener(fname, mode='rb').fobj, MockIGZFile) assert isinstance(Opener(fname, mode='wb').fobj, GzipFile) From 82e7c844eb15d5f507aca31d7869fa97eb74ae30 Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Sun, 10 Sep 2017 12:25:25 +0100 Subject: [PATCH 24/33] NF+RF: MGHImage from_file_map/from_filename methods passes keep_file_open flag through to ArrayProxy. ArrayProxy handles keep_file_open flag a bit more explicitly/robustly. --- nibabel/arrayproxy.py | 9 ++++++--- nibabel/freesurfer/mghformat.py | 31 +++++++++++++++++++++++++++---- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py index d16b16576f..7c25e5f78e 100644 --- a/nibabel/arrayproxy.py +++ b/nibabel/arrayproxy.py @@ -186,13 +186,16 @@ def _should_keep_file_open(self, file_like, keep_file_open): The value of ``keep_file_open`` that will be used by this ``ArrayProxy``. """ + # if keep_file_open is True/False, we do what the user wants us to do + if isinstance(keep_file_open, bool): + return keep_file_open + if keep_file_open != 'auto': + raise ValueError( + 'keep_file_open should be one of {\'auto\', True, False }') # file_like is a handle - keep_file_open is irrelevant if hasattr(file_like, 'read') and hasattr(file_like, 'seek'): return False - # if keep_file_open is True/False, we do what the user wants us to do - if keep_file_open != 'auto': - return bool(keep_file_open) # Otherwise, if file_like is gzipped, and we have_indexed_gzip, we set # keep_file_open to True, else we set it to False return HAVE_INDEXED_GZIP and file_like.endswith('gz') diff --git a/nibabel/freesurfer/mghformat.py b/nibabel/freesurfer/mghformat.py index efe51c7d5a..bd53f9c38d 100644 --- a/nibabel/freesurfer/mghformat.py +++ b/nibabel/freesurfer/mghformat.py @@ -476,7 +476,7 @@ def filespec_to_file_map(klass, filespec): @classmethod @kw_only_meth(1) - def from_file_map(klass, file_map, mmap=True): + def from_file_map(klass, file_map, mmap=True, keep_file_open='auto'): '''Load image from `file_map` Parameters @@ -491,6 +491,17 @@ def from_file_map(klass, file_map, mmap=True): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. + keep_file_open : { 'auto', True, False }, optional, keyword only + `keep_file_open` controls whether a new file handle is created + every time the image is accessed, or a single file handle is + created and used for the lifetime of this ``ArrayProxy``. If + ``True``, a single file handle is created and used. If ``False``, + a new file handle is created every time the image is accessed. If + ``'auto'`` (the default), and the optional ``indexed_gzip`` + dependency is present, a single file handle is created and + persisted. If ``indexed_gzip`` is not available, behaviour is the + same as if ``keep_file_open is False``. If ``file_map`` refers to + an open file handle, this setting has no effect. ''' if mmap not in (True, False, 'c', 'r'): raise ValueError("mmap should be one of {True, False, 'c', 'r'}") @@ -500,7 +511,8 @@ def from_file_map(klass, file_map, mmap=True): affine = header.get_affine() hdr_copy = header.copy() # Pass original image fileobj / filename to array proxy - data = klass.ImageArrayProxy(img_fh.file_like, hdr_copy, mmap=mmap) + data = klass.ImageArrayProxy(img_fh.file_like, hdr_copy, mmap=mmap, + keep_file_open=keep_file_open) img = klass(data, affine, header, file_map=file_map) img._load_cache = {'header': hdr_copy, 'affine': affine.copy(), @@ -509,7 +521,7 @@ def from_file_map(klass, file_map, mmap=True): @classmethod @kw_only_meth(1) - def from_filename(klass, filename, mmap=True): + def from_filename(klass, filename, mmap=True, keep_file_open='auto'): ''' class method to create image from filename `filename` Parameters @@ -523,6 +535,16 @@ def from_filename(klass, filename, mmap=True): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. + keep_file_open : { 'auto', True, False }, optional, keyword only + `keep_file_open` controls whether a new file handle is created + every time the image is accessed, or a single file handle is + created and used for the lifetime of this ``ArrayProxy``. If + ``True``, a single file handle is created and used. If ``False``, + a new file handle is created every time the image is accessed. If + ``'auto'`` (the default), and the optional ``indexed_gzip`` + dependency is present, a single file handle is created and + persisted. If ``indexed_gzip`` is not available, behaviour is the + same as if ``keep_file_open is False``. Returns ------- @@ -531,7 +553,8 @@ def from_filename(klass, filename, mmap=True): if mmap not in (True, False, 'c', 'r'): raise ValueError("mmap should be one of {True, False, 'c', 'r'}") file_map = klass.filespec_to_file_map(filename) - return klass.from_file_map(file_map, mmap=mmap) + return klass.from_file_map(file_map, mmap=mmap, + keep_file_open=keep_file_open) load = from_filename From 191eb5c5926138e05109b92d25c43b733f5c7557 Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Sun, 10 Sep 2017 12:45:18 +0100 Subject: [PATCH 25/33] TEST: Test invalid values for ArrayProxy keep_file_open flag. Add indexed_gzip to CI build matrix --- .travis.yml | 10 +++++++++- nibabel/tests/test_arrayproxy.py | 18 +++++++++++++++--- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index b67cd56c95..1b37064202 100644 --- a/.travis.yml +++ b/.travis.yml @@ -15,6 +15,7 @@ cache: env: global: - DEPENDS="six numpy scipy matplotlib h5py pillow" + - OPTIONAL_DEPENDS="" - PYDICOM=1 - INSTALL_TYPE="setup" - EXTRA_WHEELS="https://5cf40426d9f06eb7461d-6fe47d9331aba7cd62fc36c7196769e4.ssl.cf2.rackcdn.com" @@ -84,6 +85,13 @@ matrix: - python: 3.5 env: - DOC_DOC_TEST=1 + # Run tests with indexed_gzip present + - python: 2.7 + env: + - OPTIONAL_DEPENDS="indexed_gzip" + - python: 3.5 + env: + - OPTIONAL_DEPENDS="indexed_gzip" before_install: - source tools/travis_tools.sh - python -m pip install --upgrade pip @@ -93,7 +101,7 @@ before_install: - python --version # just to check - pip install -U pip wheel # needed at one point - retry pip install nose flake8 mock # always - - pip install $EXTRA_PIP_FLAGS $DEPENDS + - pip install $EXTRA_PIP_FLAGS $DEPENDS $OPTIONAL_DEPENDS # pydicom <= 0.9.8 doesn't install on python 3 - if [ "${TRAVIS_PYTHON_VERSION:0:1}" == "2" ]; then if [ "$PYDICOM" == "1" ]; then diff --git a/nibabel/tests/test_arrayproxy.py b/nibabel/tests/test_arrayproxy.py index 7caab690fe..988e2e85db 100644 --- a/nibabel/tests/test_arrayproxy.py +++ b/nibabel/tests/test_arrayproxy.py @@ -345,7 +345,7 @@ def __init__(self, *args, **kwargs): CountingImageOpener.numOpeners += 1 -def test_keep_file_open_true_false(): +def test_keep_file_open_true_false_invalid(): # Test the behaviour of the keep_file_open __init__ flag, when it is set to # True or False. CountingImageOpener.numOpeners = 0 @@ -360,7 +360,8 @@ def test_keep_file_open_true_false(): # handle, and that ArrayProxy(keep_file_open=False) creates a file # handle on every data access. with mock.patch('nibabel.arrayproxy.ImageOpener', CountingImageOpener): - proxy_no_kfp = ArrayProxy(fname, ((10, 10, 10), dtype)) + proxy_no_kfp = ArrayProxy(fname, ((10, 10, 10), dtype), + keep_file_open=False) assert not proxy_no_kfp._keep_file_open for i in range(voxels.shape[0]): x , y, z = [int(c) for c in voxels[i, :]] @@ -388,13 +389,24 @@ def test_keep_file_open_true_false(): assert not fobj.closed proxy_kfp = ArrayProxy(fobj, ((10, 10, 10), dtype), keep_file_open=True) - assert not proxy_kfp._keep_file_open + assert proxy_kfp._keep_file_open for i in range(voxels.shape[0]): assert proxy_kfp[x, y, z] == x * 100 + y * 10 + z assert not fobj.closed del proxy_kfp proxy_kfp = None assert not fobj.closed + # Test invalid values of keep_file_open + with assert_raises(ValueError): + ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open=0) + with assert_raises(ValueError): + ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open=1) + with assert_raises(ValueError): + ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open=55) + with assert_raises(ValueError): + ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='autob') + with assert_raises(ValueError): + ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='cauto') def test_keep_file_open_default(): From 16f191e73a0dbd9a178ee10c7ccca2e40dc1ce4e Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Wed, 20 Sep 2017 14:39:46 +0100 Subject: [PATCH 26/33] RF: Explicitly catch ImportError, instead of catch-all --- nibabel/openers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nibabel/openers.py b/nibabel/openers.py index 067d38c409..8767eef5c1 100644 --- a/nibabel/openers.py +++ b/nibabel/openers.py @@ -18,7 +18,7 @@ try: from indexed_gzip import SafeIndexedGzipFile HAVE_INDEXED_GZIP = True -except: +except ImportError: HAVE_INDEXED_GZIP = False From aae3417555a09e8d1521b96ac9f3c654d1ecca47 Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Sat, 23 Sep 2017 17:09:54 +0100 Subject: [PATCH 27/33] RF: Default keep_file_open value is now False, and can be controlled via a module-level arrayproxy.KEEP_FILE_OPEN_DEFAULT flag --- nibabel/analyze.py | 29 ++++++++++++++++------------- nibabel/arrayproxy.py | 33 +++++++++++++++++++++++++++------ nibabel/freesurfer/mghformat.py | 26 ++++++++++++++------------ nibabel/spm99analyze.py | 13 +++++++------ 4 files changed, 64 insertions(+), 37 deletions(-) diff --git a/nibabel/analyze.py b/nibabel/analyze.py index e43b4cf03c..2644cbe081 100644 --- a/nibabel/analyze.py +++ b/nibabel/analyze.py @@ -934,8 +934,8 @@ def set_data_dtype(self, dtype): @classmethod @kw_only_meth(1) - def from_file_map(klass, file_map, mmap=True, keep_file_open='auto'): - ''' class method to create image from mapping in `file_map `` + def from_file_map(klass, file_map, mmap=True, keep_file_open=None): + '''class method to create image from mapping in `file_map `` Parameters ---------- @@ -956,11 +956,12 @@ def from_file_map(klass, file_map, mmap=True, keep_file_open='auto'): created and used for the lifetime of this ``ArrayProxy``. If ``True``, a single file handle is created and used. If ``False``, a new file handle is created every time the image is accessed. If - ``'auto'`` (the default), and the optional ``indexed_gzip`` - dependency is present, a single file handle is created and - persisted. If ``indexed_gzip`` is not available, behaviour is the - same as if ``keep_file_open is False``. If ``file_map`` refers - to an open file handle, this setting has no effect. + ``'auto'``, and the optional ``indexed_gzip`` dependency is + present, a single file handle is created and persisted. If + ``indexed_gzip`` is not available, behaviour is the same as if + ``keep_file_open is False``. If ``file_map`` refers to an open + file handle, this setting has no effect. The default value is + set to the value of ``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT``. Returns ------- @@ -988,8 +989,8 @@ def from_file_map(klass, file_map, mmap=True, keep_file_open='auto'): @classmethod @kw_only_meth(1) - def from_filename(klass, filename, mmap=True, keep_file_open='auto'): - ''' class method to create image from filename `filename` + def from_filename(klass, filename, mmap=True, keep_file_open=None): + '''class method to create image from filename `filename` Parameters ---------- @@ -1002,16 +1003,18 @@ def from_filename(klass, filename, mmap=True, keep_file_open='auto'): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. + keep_file_open : { 'auto', True, False }, optional, keyword only `keep_file_open` controls whether a new file handle is created every time the image is accessed, or a single file handle is created and used for the lifetime of this ``ArrayProxy``. If ``True``, a single file handle is created and used. If ``False``, a new file handle is created every time the image is accessed. If - ``'auto'`` (the default), and the optional ``indexed_gzip`` - dependency is present, a single file handle is created and - persisted. If ``indexed_gzip`` is not available, behaviour is the - same as if ``keep_file_open is False``. + ``'auto'``, and the optional ``indexed_gzip`` dependency is + present, a single file handle is created and persisted. If + ``indexed_gzip`` is not available, behaviour is the same as if + ``keep_file_open is False``. The default value is set to the value + of ``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT``. Returns ------- diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py index 7c25e5f78e..1a997aa032 100644 --- a/nibabel/arrayproxy.py +++ b/nibabel/arrayproxy.py @@ -37,6 +37,24 @@ from .openers import ImageOpener, HAVE_INDEXED_GZIP +KEEP_FILE_OPEN_DEFAULT = False +"""This flag controls whether a new file handle is created every time an image +is accessed through an ``ArrayProxy``, or a single file handle is created and +used for the lifetime of the ``ArrayProxy``. It should be set to one of +``True``, ``False``, or ``'auto'``. + +If ``True``, a single file handle is created and used. If ``False``, a new +file handle is created every time the image is accessed. If ``'auto'``, and +the optional ``indexed_gzip`` dependency is present, a single file handle is +created and persisted. If ``indexed_gzip`` is not available, behaviour is the +same as if ``keep_file_open is False``. + +If this is set to any other value, attempts to create an ``ArrayProxy`` without +specifying the ``keep_file_open`` flag will result in a ``ValueError`` being +raised. +""" + + class ArrayProxy(object): """ Class to act as proxy for the array that can be read from a file @@ -72,7 +90,7 @@ class ArrayProxy(object): _header = None @kw_only_meth(2) - def __init__(self, file_like, spec, mmap=True, keep_file_open='auto'): + def __init__(self, file_like, spec, mmap=True, keep_file_open=None): """Initialize array proxy instance Parameters @@ -108,11 +126,12 @@ def __init__(self, file_like, spec, mmap=True, keep_file_open='auto'): created and used for the lifetime of this ``ArrayProxy``. If ``True``, a single file handle is created and used. If ``False``, a new file handle is created every time the image is accessed. If - ``'auto'`` (the default), and the optional ``indexed_gzip`` - dependency is present, a single file handle is created and - persisted. If ``indexed_gzip`` is not available, behaviour is the - same as if ``keep_file_open is False``. If ``file_like`` is an - open file handle, this setting has no effect. + ``'auto'``, and the optional ``indexed_gzip`` dependency is + present, a single file handle is created and persisted. If + ``indexed_gzip`` is not available, behaviour is the same as if + ``keep_file_open is False``. If ``file_like`` is an open file + handle, this setting has no effect. The default value is set to + the value of ``KEEP_FILE_OPEN_DEFAULT``. """ if mmap not in (True, False, 'c', 'r'): raise ValueError("mmap should be one of {True, False, 'c', 'r'}") @@ -186,6 +205,8 @@ def _should_keep_file_open(self, file_like, keep_file_open): The value of ``keep_file_open`` that will be used by this ``ArrayProxy``. """ + if keep_file_open is None: + keep_file_open = KEEP_FILE_OPEN_DEFAULT # if keep_file_open is True/False, we do what the user wants us to do if isinstance(keep_file_open, bool): return keep_file_open diff --git a/nibabel/freesurfer/mghformat.py b/nibabel/freesurfer/mghformat.py index bd53f9c38d..2dacd64a71 100644 --- a/nibabel/freesurfer/mghformat.py +++ b/nibabel/freesurfer/mghformat.py @@ -476,7 +476,7 @@ def filespec_to_file_map(klass, filespec): @classmethod @kw_only_meth(1) - def from_file_map(klass, file_map, mmap=True, keep_file_open='auto'): + def from_file_map(klass, file_map, mmap=True, keep_file_open=None): '''Load image from `file_map` Parameters @@ -497,11 +497,12 @@ def from_file_map(klass, file_map, mmap=True, keep_file_open='auto'): created and used for the lifetime of this ``ArrayProxy``. If ``True``, a single file handle is created and used. If ``False``, a new file handle is created every time the image is accessed. If - ``'auto'`` (the default), and the optional ``indexed_gzip`` - dependency is present, a single file handle is created and - persisted. If ``indexed_gzip`` is not available, behaviour is the - same as if ``keep_file_open is False``. If ``file_map`` refers to - an open file handle, this setting has no effect. + ``'auto'``, and the optional ``indexed_gzip`` dependency is + present, a single file handle is created and persisted. If + ``indexed_gzip`` is not available, behaviour is the same as if + ``keep_file_open is False``. If ``file_map`` refers to an open + file handle, this setting has no effect. The default value is + set to the value of ``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT``. ''' if mmap not in (True, False, 'c', 'r'): raise ValueError("mmap should be one of {True, False, 'c', 'r'}") @@ -521,8 +522,8 @@ def from_file_map(klass, file_map, mmap=True, keep_file_open='auto'): @classmethod @kw_only_meth(1) - def from_filename(klass, filename, mmap=True, keep_file_open='auto'): - ''' class method to create image from filename `filename` + def from_filename(klass, filename, mmap=True, keep_file_open=None): + '''class method to create image from filename `filename` Parameters ---------- @@ -541,10 +542,11 @@ def from_filename(klass, filename, mmap=True, keep_file_open='auto'): created and used for the lifetime of this ``ArrayProxy``. If ``True``, a single file handle is created and used. If ``False``, a new file handle is created every time the image is accessed. If - ``'auto'`` (the default), and the optional ``indexed_gzip`` - dependency is present, a single file handle is created and - persisted. If ``indexed_gzip`` is not available, behaviour is the - same as if ``keep_file_open is False``. + ``'auto'``, and the optional ``indexed_gzip`` dependency is + present, a single file handle is created and persisted. If + ``indexed_gzip`` is not available, behaviour is the same as if + ``keep_file_open is False``. The default value is set to the value + of ``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT``. Returns ------- diff --git a/nibabel/spm99analyze.py b/nibabel/spm99analyze.py index 32d4c27b34..2d9a43d8b9 100644 --- a/nibabel/spm99analyze.py +++ b/nibabel/spm99analyze.py @@ -245,7 +245,7 @@ class Spm99AnalyzeImage(analyze.AnalyzeImage): @classmethod @kw_only_meth(1) - def from_file_map(klass, file_map, mmap=True, keep_file_open='auto'): + def from_file_map(klass, file_map, mmap=True, keep_file_open=None): '''class method to create image from mapping in `file_map `` Parameters @@ -267,11 +267,12 @@ def from_file_map(klass, file_map, mmap=True, keep_file_open='auto'): created and used for the lifetime of this ``ArrayProxy``. If ``True``, a single file handle is created and used. If ``False``, a new file handle is created every time the image is accessed. If - ``'auto'`` (the default), and the optional ``indexed_gzip`` - dependency is present, a single file handle is created and - persisted. If ``indexed_gzip`` is not available, behaviour is the - same as if ``keep_file_open is False``. If ``file_map`` refers to - an open file handle, this setting has no effect. + ``'auto'``, and the optional ``indexed_gzip`` dependency is + present, a single file handle is created and persisted. If + ``indexed_gzip`` is not available, behaviour is the same as if + ``keep_file_open is False``. If ``file_map`` refers to an open + file handle, this setting has no effect. The default value is + set to the value of ``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT``. Returns ------- From 900869693fa96a035e60ddab01845dc44dd627af Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Sat, 23 Sep 2017 17:10:52 +0100 Subject: [PATCH 28/33] TEST: Adjusted keep_file_open tests, and test changes to the KEEP_FILE_OPEN_DEFAULT attribute. --- nibabel/tests/test_arrayproxy.py | 70 +++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/nibabel/tests/test_arrayproxy.py b/nibabel/tests/test_arrayproxy.py index 988e2e85db..7d46c3c9e1 100644 --- a/nibabel/tests/test_arrayproxy.py +++ b/nibabel/tests/test_arrayproxy.py @@ -19,7 +19,8 @@ import numpy as np -from ..arrayproxy import ArrayProxy, is_proxy, reshape_dataobj +from ..arrayproxy import (ArrayProxy, KEEP_FILE_OPEN_DEFAULT, is_proxy, + reshape_dataobj) from ..openers import ImageOpener from ..nifti1 import Nifti1Header @@ -409,9 +410,9 @@ def test_keep_file_open_true_false_invalid(): ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='cauto') -def test_keep_file_open_default(): +def test_keep_file_open_auto(): # Test the behaviour of the keep_file_open __init__ flag, when it is set to - # its default value + # 'auto' dtype = np.float32 data = np.arange(1000, dtype=dtype).reshape((10, 10, 10)) with InTemporaryDirectory(): @@ -423,18 +424,75 @@ def test_keep_file_open_default(): mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', True), \ mock.patch('nibabel.openers.SafeIndexedGzipFile', gzip.GzipFile, create=True): - proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) - assert proxy._keep_file_open proxy = ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='auto') assert proxy._keep_file_open # If no have_indexed_gzip, then keep_file_open should be False + with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', False), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', False), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', None, create=True): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='auto') + assert not proxy._keep_file_open + + +def test_keep_file_open_default(): + # Test the behaviour of the keep_file_open __init__ flag, when the + # arrayproxy.KEEP_FILE_OPEN_DEFAULT value is changed + dtype = np.float32 + data = np.arange(1000, dtype=dtype).reshape((10, 10, 10)) + with InTemporaryDirectory(): + fname = 'testdata.gz' + with gzip.open(fname, 'wb') as fobj: + fobj.write(data.tostring(order='F')) + # The default value of KEEP_FILE_OPEN_DEFAULT should cause keep_file_open + # to be False, regardless of whether or not indexed_gzip is present + assert KEEP_FILE_OPEN_DEFAULT is False with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', False), \ mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', False), \ mock.patch('nibabel.openers.SafeIndexedGzipFile', None, create=True): proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) assert not proxy._keep_file_open - proxy = ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='auto') + with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', True), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', True), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', gzip.GzipFile, + create=True): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert not proxy._keep_file_open + # KEEP_FILE_OPEN_DEFAULT=True should cause keep_file_open to be True, + # regardless of whether or not indexed_gzip is present + with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', True), \ + mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', True), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', True), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', gzip.GzipFile, + create=True): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert proxy._keep_file_open + with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', True), \ + mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', False), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', False), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', None, create=True): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert proxy._keep_file_open + # KEEP_FILE_OPEN_DEFAULT=auto should cause keep_file_open to be True + # or False, depending on whether indeed_gzip is present, + with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', 'auto'), \ + mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', True), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', True), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', gzip.GzipFile, + create=True): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert proxy._keep_file_open + with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', 'auto'), \ + mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', False), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', False), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', None, create=True): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) assert not proxy._keep_file_open + # KEEP_FILE_OPEN_DEFAULT=any other value should cuse an error to be raised + with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', 'badvalue'): + assert_raises(ValueError, ArrayProxy, fname, ((10, 10, 10), dtype)) + with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', None): + assert_raises(ValueError, ArrayProxy, fname, ((10, 10, 10), dtype)) + def test_pickle_lock(): # Test that ArrayProxy can be pickled, and that thread lock is created From ed55620f7507af17f8eb5ecabb09511240d4d157 Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Sat, 23 Sep 2017 17:24:27 +0100 Subject: [PATCH 29/33] DOC: Moved attribute docstring --- nibabel/arrayproxy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py index 1a997aa032..8704e8c276 100644 --- a/nibabel/arrayproxy.py +++ b/nibabel/arrayproxy.py @@ -37,7 +37,6 @@ from .openers import ImageOpener, HAVE_INDEXED_GZIP -KEEP_FILE_OPEN_DEFAULT = False """This flag controls whether a new file handle is created every time an image is accessed through an ``ArrayProxy``, or a single file handle is created and used for the lifetime of the ``ArrayProxy``. It should be set to one of @@ -53,6 +52,7 @@ specifying the ``keep_file_open`` flag will result in a ``ValueError`` being raised. """ +KEEP_FILE_OPEN_DEFAULT = False class ArrayProxy(object): From 0b41c4938ad8b497c77fb36a4025eb853cb2c535 Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Sat, 23 Sep 2017 17:24:58 +0100 Subject: [PATCH 30/33] TEST: Fixed indentation in arrayproxy tests. Not that it was having any effect anyway. --- nibabel/tests/test_arrayproxy.py | 134 +++++++++++++++++-------------- 1 file changed, 72 insertions(+), 62 deletions(-) diff --git a/nibabel/tests/test_arrayproxy.py b/nibabel/tests/test_arrayproxy.py index 7d46c3c9e1..6a97ac675a 100644 --- a/nibabel/tests/test_arrayproxy.py +++ b/nibabel/tests/test_arrayproxy.py @@ -419,19 +419,22 @@ def test_keep_file_open_auto(): fname = 'testdata.gz' with gzip.open(fname, 'wb') as fobj: fobj.write(data.tostring(order='F')) - # If have_indexed_gzip, then keep_file_open should be True - with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', True), \ - mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', True), \ - mock.patch('nibabel.openers.SafeIndexedGzipFile', gzip.GzipFile, - create=True): - proxy = ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='auto') - assert proxy._keep_file_open - # If no have_indexed_gzip, then keep_file_open should be False - with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', False), \ - mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', False), \ - mock.patch('nibabel.openers.SafeIndexedGzipFile', None, create=True): - proxy = ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='auto') - assert not proxy._keep_file_open + # If have_indexed_gzip, then keep_file_open should be True + with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', True), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', True), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', gzip.GzipFile, + create=True): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype), + keep_file_open='auto') + assert proxy._keep_file_open + # If no have_indexed_gzip, then keep_file_open should be False + with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', False), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', False), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', None, + create=True): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype), + keep_file_open='auto') + assert not proxy._keep_file_open def test_keep_file_open_default(): @@ -443,55 +446,62 @@ def test_keep_file_open_default(): fname = 'testdata.gz' with gzip.open(fname, 'wb') as fobj: fobj.write(data.tostring(order='F')) - # The default value of KEEP_FILE_OPEN_DEFAULT should cause keep_file_open - # to be False, regardless of whether or not indexed_gzip is present - assert KEEP_FILE_OPEN_DEFAULT is False - with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', False), \ - mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', False), \ - mock.patch('nibabel.openers.SafeIndexedGzipFile', None, create=True): - proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) - assert not proxy._keep_file_open - with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', True), \ - mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', True), \ - mock.patch('nibabel.openers.SafeIndexedGzipFile', gzip.GzipFile, - create=True): - proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) - assert not proxy._keep_file_open - # KEEP_FILE_OPEN_DEFAULT=True should cause keep_file_open to be True, - # regardless of whether or not indexed_gzip is present - with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', True), \ - mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', True), \ - mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', True), \ - mock.patch('nibabel.openers.SafeIndexedGzipFile', gzip.GzipFile, - create=True): - proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) - assert proxy._keep_file_open - with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', True), \ - mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', False), \ - mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', False), \ - mock.patch('nibabel.openers.SafeIndexedGzipFile', None, create=True): - proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) - assert proxy._keep_file_open - # KEEP_FILE_OPEN_DEFAULT=auto should cause keep_file_open to be True - # or False, depending on whether indeed_gzip is present, - with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', 'auto'), \ - mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', True), \ - mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', True), \ - mock.patch('nibabel.openers.SafeIndexedGzipFile', gzip.GzipFile, - create=True): - proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) - assert proxy._keep_file_open - with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', 'auto'), \ - mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', False), \ - mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', False), \ - mock.patch('nibabel.openers.SafeIndexedGzipFile', None, create=True): - proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) - assert not proxy._keep_file_open - # KEEP_FILE_OPEN_DEFAULT=any other value should cuse an error to be raised - with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', 'badvalue'): - assert_raises(ValueError, ArrayProxy, fname, ((10, 10, 10), dtype)) - with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', None): - assert_raises(ValueError, ArrayProxy, fname, ((10, 10, 10), dtype)) + # The default value of KEEP_FILE_OPEN_DEFAULT should cause + # keep_file_open to be False, regardless of whether or not indexed_gzip + # is present + assert KEEP_FILE_OPEN_DEFAULT is False + with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', False), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', False), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', None, + create=True): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert not proxy._keep_file_open + with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', True), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', True), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', gzip.GzipFile, + create=True): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert not proxy._keep_file_open + # KEEP_FILE_OPEN_DEFAULT=True should cause keep_file_open to be True, + # regardless of whether or not indexed_gzip is present + with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', True), \ + mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', True), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', True), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', gzip.GzipFile, + create=True): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert proxy._keep_file_open + with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', True), \ + mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', False), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', False), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', None, + create=True): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert proxy._keep_file_open + # KEEP_FILE_OPEN_DEFAULT=auto should cause keep_file_open to be True + # or False, depending on whether indeed_gzip is present, + with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', 'auto'), \ + mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', True), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', True), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', gzip.GzipFile, + create=True): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert proxy._keep_file_open + with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', 'auto'), \ + mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', False), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', False), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', None, + create=True): + proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) + assert not proxy._keep_file_open + # KEEP_FILE_OPEN_DEFAULT=any other value should cuse an error to be + # raised + with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', 'badval'): + assert_raises(ValueError, ArrayProxy, fname, ((10, 10, 10), + dtype)) + with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', None): + assert_raises(ValueError, ArrayProxy, fname, ((10, 10, 10), + dtype)) def test_pickle_lock(): From dc07879a6ca1f9939740b536fbd4553345cdaf18 Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Sun, 24 Sep 2017 14:14:24 +0100 Subject: [PATCH 31/33] DOC+RF: Adjust docs for ArrayProxy keep_file_open flag, and for fileslice thread lock. Variable rename in fileslice to avoid collision with built-in. --- nibabel/analyze.py | 15 ++++++++------- nibabel/arrayproxy.py | 11 +++++------ nibabel/fileslice.py | 21 +++++++++++++++------ nibabel/freesurfer/mghformat.py | 14 ++++++++------ nibabel/spm99analyze.py | 7 ++++--- 5 files changed, 40 insertions(+), 28 deletions(-) diff --git a/nibabel/analyze.py b/nibabel/analyze.py index 2644cbe081..ba1529e3b9 100644 --- a/nibabel/analyze.py +++ b/nibabel/analyze.py @@ -950,7 +950,7 @@ def from_file_map(klass, file_map, mmap=True, keep_file_open=None): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. - keep_file_open : { 'auto', True, False }, optional, keyword only + keep_file_open : { None, 'auto', True, False }, optional, keyword only `keep_file_open` controls whether a new file handle is created every time the image is accessed, or a single file handle is created and used for the lifetime of this ``ArrayProxy``. If @@ -960,8 +960,9 @@ def from_file_map(klass, file_map, mmap=True, keep_file_open=None): present, a single file handle is created and persisted. If ``indexed_gzip`` is not available, behaviour is the same as if ``keep_file_open is False``. If ``file_map`` refers to an open - file handle, this setting has no effect. The default value is - set to the value of ``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT``. + file handle, this setting has no effect. The default value + (``None``) will result in the value of + ``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT`` being used. Returns ------- @@ -1003,8 +1004,7 @@ def from_filename(klass, filename, mmap=True, keep_file_open=None): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. - - keep_file_open : { 'auto', True, False }, optional, keyword only + keep_file_open : { None, 'auto', True, False }, optional, keyword only `keep_file_open` controls whether a new file handle is created every time the image is accessed, or a single file handle is created and used for the lifetime of this ``ArrayProxy``. If @@ -1013,8 +1013,9 @@ def from_filename(klass, filename, mmap=True, keep_file_open=None): ``'auto'``, and the optional ``indexed_gzip`` dependency is present, a single file handle is created and persisted. If ``indexed_gzip`` is not available, behaviour is the same as if - ``keep_file_open is False``. The default value is set to the value - of ``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT``. + ``keep_file_open is False``. The default value (``None``) will + result in the value of + ``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT`` being used. Returns ------- diff --git a/nibabel/arrayproxy.py b/nibabel/arrayproxy.py index 8704e8c276..18d7f67e3b 100644 --- a/nibabel/arrayproxy.py +++ b/nibabel/arrayproxy.py @@ -120,7 +120,7 @@ def __init__(self, file_like, spec, mmap=True, keep_file_open=None): True gives the same behavior as ``mmap='c'``. If `file_like` cannot be memory-mapped, ignore `mmap` value and read array from file. - keep_file_open : { 'auto', True, False }, optional, keyword only + keep_file_open : { None, 'auto', True, False }, optional, keyword only `keep_file_open` controls whether a new file handle is created every time the image is accessed, or a single file handle is created and used for the lifetime of this ``ArrayProxy``. If @@ -130,8 +130,8 @@ def __init__(self, file_like, spec, mmap=True, keep_file_open=None): present, a single file handle is created and persisted. If ``indexed_gzip`` is not available, behaviour is the same as if ``keep_file_open is False``. If ``file_like`` is an open file - handle, this setting has no effect. The default value is set to - the value of ``KEEP_FILE_OPEN_DEFAULT``. + handle, this setting has no effect. The default value (``None``) + will result in the value of ``KEEP_FILE_OPEN_DEFAULT`` being used. """ if mmap not in (True, False, 'c', 'r'): raise ValueError("mmap should be one of {True, False, 'c', 'r'}") @@ -211,8 +211,8 @@ def _should_keep_file_open(self, file_like, keep_file_open): if isinstance(keep_file_open, bool): return keep_file_open if keep_file_open != 'auto': - raise ValueError( - 'keep_file_open should be one of {\'auto\', True, False }') + raise ValueError('keep_file_open should be one of {None, ' + '\'auto\', True, False}') # file_like is a handle - keep_file_open is irrelevant if hasattr(file_like, 'read') and hasattr(file_like, 'seek'): @@ -257,7 +257,6 @@ def _get_fileobj(self): The specific behaviour depends on the value of the ``keep_file_open`` flag that was passed to ``__init__``. - Yields ------ ImageOpener diff --git a/nibabel/fileslice.py b/nibabel/fileslice.py index df93930cba..e55f48c127 100644 --- a/nibabel/fileslice.py +++ b/nibabel/fileslice.py @@ -18,7 +18,9 @@ class _NullLock(object): - """The ``_NullLock`` is an object which can be used in place of a + """Can be used as no-function dummy object in place of ``threading.lock``. + + The ``_NullLock`` is an object which can be used in place of a ``threading.Lock`` object, but doesn't actually do anything. It is used by the ``read_segments`` function in the event that a @@ -648,12 +650,14 @@ def read_segments(fileobj, segments, n_bytes, lock=None): absolute file offset in bytes and number of bytes to read n_bytes : int total number of bytes that will be read - lock : threading.Lock + lock : {None, threading.Lock, lock-like} optional If provided, used to ensure that paired calls to ``seek`` and ``read`` cannot be interrupted by another thread accessing the same ``fileobj``. Each thread which accesses the same file via ``read_segments`` must share a lock in order to ensure that the file access is thread-safe. - A lock does not need to be provided for single-threaded access. + A lock does not need to be provided for single-threaded access. The + default value (``None``) results in a lock-like object (a + ``_NullLock``) which does not do anything. Returns ------- @@ -763,9 +767,14 @@ def fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C', returning one of 'full', 'contiguous', None. See :func:`optimize_slicer` and see :func:`threshold_heuristic` for an example. - lock: threading.Lock, optional + lock : {None, threading.Lock, lock-like} optional If provided, used to ensure that paired calls to ``seek`` and ``read`` cannot be interrupted by another thread accessing the same ``fileobj``. + Each thread which accesses the same file via ``read_segments`` must + share a lock in order to ensure that the file access is thread-safe. + A lock does not need to be provided for single-threaded access. The + default value (``None``) results in a lock-like object (a + ``_NullLock``) which does not do anything. Returns ------- @@ -779,8 +788,8 @@ def fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C', segments, sliced_shape, post_slicers = calc_slicedefs( sliceobj, shape, itemsize, offset, order) n_bytes = reduce(operator.mul, sliced_shape, 1) * itemsize - bytes = read_segments(fileobj, segments, n_bytes, lock) - sliced = np.ndarray(sliced_shape, dtype, buffer=bytes, order=order) + arr_data = read_segments(fileobj, segments, n_bytes, lock) + sliced = np.ndarray(sliced_shape, dtype, buffer=arr_data, order=order) return sliced[post_slicers] diff --git a/nibabel/freesurfer/mghformat.py b/nibabel/freesurfer/mghformat.py index 2dacd64a71..1bc9071538 100644 --- a/nibabel/freesurfer/mghformat.py +++ b/nibabel/freesurfer/mghformat.py @@ -491,7 +491,7 @@ def from_file_map(klass, file_map, mmap=True, keep_file_open=None): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. - keep_file_open : { 'auto', True, False }, optional, keyword only + keep_file_open : { None, 'auto', True, False }, optional, keyword only `keep_file_open` controls whether a new file handle is created every time the image is accessed, or a single file handle is created and used for the lifetime of this ``ArrayProxy``. If @@ -501,8 +501,9 @@ def from_file_map(klass, file_map, mmap=True, keep_file_open=None): present, a single file handle is created and persisted. If ``indexed_gzip`` is not available, behaviour is the same as if ``keep_file_open is False``. If ``file_map`` refers to an open - file handle, this setting has no effect. The default value is - set to the value of ``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT``. + file handle, this setting has no effect. The default value + (``None``) will result in the value of + ``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT`` being used. ''' if mmap not in (True, False, 'c', 'r'): raise ValueError("mmap should be one of {True, False, 'c', 'r'}") @@ -536,7 +537,7 @@ def from_filename(klass, filename, mmap=True, keep_file_open=None): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. - keep_file_open : { 'auto', True, False }, optional, keyword only + keep_file_open : { None, 'auto', True, False }, optional, keyword only `keep_file_open` controls whether a new file handle is created every time the image is accessed, or a single file handle is created and used for the lifetime of this ``ArrayProxy``. If @@ -545,8 +546,9 @@ def from_filename(klass, filename, mmap=True, keep_file_open=None): ``'auto'``, and the optional ``indexed_gzip`` dependency is present, a single file handle is created and persisted. If ``indexed_gzip`` is not available, behaviour is the same as if - ``keep_file_open is False``. The default value is set to the value - of ``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT``. + ``keep_file_open is False``. The default value (``None``) will + result in the value of + ``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT`` being used. Returns ------- diff --git a/nibabel/spm99analyze.py b/nibabel/spm99analyze.py index 2d9a43d8b9..40ae4f44b8 100644 --- a/nibabel/spm99analyze.py +++ b/nibabel/spm99analyze.py @@ -261,7 +261,7 @@ def from_file_map(klass, file_map, mmap=True, keep_file_open=None): `mmap` value of True gives the same behavior as ``mmap='c'``. If image data file cannot be memory-mapped, ignore `mmap` value and read array from file. - keep_file_open : { 'auto', True, False }, optional, keyword only + keep_file_open : { None, 'auto', True, False }, optional, keyword only `keep_file_open` controls whether a new file handle is created every time the image is accessed, or a single file handle is created and used for the lifetime of this ``ArrayProxy``. If @@ -271,8 +271,9 @@ def from_file_map(klass, file_map, mmap=True, keep_file_open=None): present, a single file handle is created and persisted. If ``indexed_gzip`` is not available, behaviour is the same as if ``keep_file_open is False``. If ``file_map`` refers to an open - file handle, this setting has no effect. The default value is - set to the value of ``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT``. + file handle, this setting has no effect. The default value + (``None``) will result in the value of + ``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT`` being used. Returns ------- From 67405b04ce37b5e60393eed6d1898d2494249373 Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Sun, 24 Sep 2017 14:35:45 +0100 Subject: [PATCH 32/33] TEST: Small re-arrangements and code clean-up --- nibabel/tests/test_arrayproxy.py | 86 +++++++++++++++----------------- nibabel/tests/test_openers.py | 2 +- 2 files changed, 42 insertions(+), 46 deletions(-) diff --git a/nibabel/tests/test_arrayproxy.py b/nibabel/tests/test_arrayproxy.py index 6a97ac675a..61492f6038 100644 --- a/nibabel/tests/test_arrayproxy.py +++ b/nibabel/tests/test_arrayproxy.py @@ -12,6 +12,7 @@ import warnings import gzip +import contextlib import pickle from io import BytesIO @@ -24,11 +25,12 @@ from ..openers import ImageOpener from ..nifti1 import Nifti1Header +import mock + from numpy.testing import assert_array_equal, assert_array_almost_equal from nose.tools import (assert_true, assert_false, assert_equal, assert_not_equal, assert_raises) from nibabel.testing import VIRAL_MEMMAP -import mock from .test_fileslice import slicer_samples @@ -338,18 +340,18 @@ def check_mmap(hdr, offset, proxy_class, # created class CountingImageOpener(ImageOpener): - numOpeners = 0 + num_openers = 0 def __init__(self, *args, **kwargs): super(CountingImageOpener, self).__init__(*args, **kwargs) - CountingImageOpener.numOpeners += 1 + CountingImageOpener.num_openers += 1 def test_keep_file_open_true_false_invalid(): # Test the behaviour of the keep_file_open __init__ flag, when it is set to # True or False. - CountingImageOpener.numOpeners = 0 + CountingImageOpener.num_openers = 0 fname = 'testdata' dtype = np.float32 data = np.arange(1000, dtype=dtype).reshape((10, 10, 10)) @@ -367,15 +369,15 @@ def test_keep_file_open_true_false_invalid(): for i in range(voxels.shape[0]): x , y, z = [int(c) for c in voxels[i, :]] assert proxy_no_kfp[x, y, z] == x * 100 + y * 10 + z - assert CountingImageOpener.numOpeners == i + 1 - CountingImageOpener.numOpeners = 0 + assert CountingImageOpener.num_openers == i + 1 + CountingImageOpener.num_openers = 0 proxy_kfp = ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open=True) assert proxy_kfp._keep_file_open for i in range(voxels.shape[0]): x , y, z = [int(c) for c in voxels[i, :]] assert proxy_kfp[x, y, z] == x * 100 + y * 10 + z - assert CountingImageOpener.numOpeners == 1 + assert CountingImageOpener.num_openers == 1 # Test that the keep_file_open flag has no effect if an open file # handle is passed in with open(fname, 'rb') as fobj: @@ -410,6 +412,28 @@ def test_keep_file_open_true_false_invalid(): ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='cauto') +@contextlib.contextmanager +def patch_indexed_gzip(state): + # Make it look like we do (state==True) or do not (state==False) have + # the indexed gzip module. + if state: + values = (True, True, gzip.GzipFile) + else: + values = (False, False, None) + with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', values[0]), \ + mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', values[1]), \ + mock.patch('nibabel.openers.SafeIndexedGzipFile', values[2], + create=True): + yield + + +@contextlib.contextmanager +def patch_keep_file_open_default(value): + # Patch arrayproxy.KEEP_FILE_OPEN_DEFAULT with the given value + with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', value): + yield + + def test_keep_file_open_auto(): # Test the behaviour of the keep_file_open __init__ flag, when it is set to # 'auto' @@ -420,18 +444,12 @@ def test_keep_file_open_auto(): with gzip.open(fname, 'wb') as fobj: fobj.write(data.tostring(order='F')) # If have_indexed_gzip, then keep_file_open should be True - with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', True), \ - mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', True), \ - mock.patch('nibabel.openers.SafeIndexedGzipFile', gzip.GzipFile, - create=True): + with patch_indexed_gzip(True): proxy = ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='auto') assert proxy._keep_file_open # If no have_indexed_gzip, then keep_file_open should be False - with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', False), \ - mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', False), \ - mock.patch('nibabel.openers.SafeIndexedGzipFile', None, - create=True): + with patch_indexed_gzip(False): proxy = ArrayProxy(fname, ((10, 10, 10), dtype), keep_file_open='auto') assert not proxy._keep_file_open @@ -450,56 +468,34 @@ def test_keep_file_open_default(): # keep_file_open to be False, regardless of whether or not indexed_gzip # is present assert KEEP_FILE_OPEN_DEFAULT is False - with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', False), \ - mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', False), \ - mock.patch('nibabel.openers.SafeIndexedGzipFile', None, - create=True): + with patch_indexed_gzip(False): proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) assert not proxy._keep_file_open - with mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', True), \ - mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', True), \ - mock.patch('nibabel.openers.SafeIndexedGzipFile', gzip.GzipFile, - create=True): + with patch_indexed_gzip(True): proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) assert not proxy._keep_file_open # KEEP_FILE_OPEN_DEFAULT=True should cause keep_file_open to be True, # regardless of whether or not indexed_gzip is present - with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', True), \ - mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', True), \ - mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', True), \ - mock.patch('nibabel.openers.SafeIndexedGzipFile', gzip.GzipFile, - create=True): + with patch_keep_file_open_default(True), patch_indexed_gzip(True): proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) assert proxy._keep_file_open - with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', True), \ - mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', False), \ - mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', False), \ - mock.patch('nibabel.openers.SafeIndexedGzipFile', None, - create=True): + with patch_keep_file_open_default(True), patch_indexed_gzip(False): proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) assert proxy._keep_file_open # KEEP_FILE_OPEN_DEFAULT=auto should cause keep_file_open to be True # or False, depending on whether indeed_gzip is present, - with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', 'auto'), \ - mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', True), \ - mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', True), \ - mock.patch('nibabel.openers.SafeIndexedGzipFile', gzip.GzipFile, - create=True): + with patch_keep_file_open_default('auto'), patch_indexed_gzip(True): proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) assert proxy._keep_file_open - with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', 'auto'), \ - mock.patch('nibabel.openers.HAVE_INDEXED_GZIP', False), \ - mock.patch('nibabel.arrayproxy.HAVE_INDEXED_GZIP', False), \ - mock.patch('nibabel.openers.SafeIndexedGzipFile', None, - create=True): + with patch_keep_file_open_default('auto'), patch_indexed_gzip(False): proxy = ArrayProxy(fname, ((10, 10, 10), dtype)) assert not proxy._keep_file_open # KEEP_FILE_OPEN_DEFAULT=any other value should cuse an error to be # raised - with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', 'badval'): + with patch_keep_file_open_default('badvalue'): assert_raises(ValueError, ArrayProxy, fname, ((10, 10, 10), dtype)) - with mock.patch('nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT', None): + with patch_keep_file_open_default(None): assert_raises(ValueError, ArrayProxy, fname, ((10, 10, 10), dtype)) diff --git a/nibabel/tests/test_openers.py b/nibabel/tests/test_openers.py index e48d06dbf0..92c74f42e9 100644 --- a/nibabel/tests/test_openers.py +++ b/nibabel/tests/test_openers.py @@ -17,10 +17,10 @@ from ..tmpdirs import InTemporaryDirectory from ..volumeutils import BinOpener +import mock from nose.tools import (assert_true, assert_false, assert_equal, assert_not_equal, assert_raises) from ..testing import error_warnings -import mock class Lunk(object): From 790c037c7ee52e399ff533d4887b708084edad31 Mon Sep 17 00:00:00 2001 From: Paul McCarthy Date: Sun, 24 Sep 2017 15:58:30 +0100 Subject: [PATCH 33/33] RF: openers._gzip_open does not bother checking whether it is given open file handles because, when called by Opener.__init__, it will only ever get passed file names. --- nibabel/openers.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/nibabel/openers.py b/nibabel/openers.py index 8767eef5c1..04382f66bc 100644 --- a/nibabel/openers.py +++ b/nibabel/openers.py @@ -67,26 +67,16 @@ def readinto(self, buf): return n_read -def _gzip_open(fileish, mode='rb', compresslevel=9): - - # is this a file? if not we assume it is a string - is_file = hasattr(fileish, 'read') and hasattr(fileish, 'write') - - # If we've been given a file object, we can't change its mode. - if is_file and hasattr(fileish, 'mode'): - mode = fileish.mode +def _gzip_open(filename, mode='rb', compresslevel=9): # use indexed_gzip if possible for faster read access if mode == 'rb' and HAVE_INDEXED_GZIP: - if is_file: - gzip_file = SafeIndexedGzipFile(fid=fileish) - else: - gzip_file = SafeIndexedGzipFile(filename=fileish) + gzip_file = SafeIndexedGzipFile(filename) # Fall-back to built-in GzipFile (wrapped with the BufferedGzipFile class # defined above) else: - gzip_file = BufferedGzipFile(fileish, mode, compresslevel) + gzip_file = BufferedGzipFile(filename, mode, compresslevel) # Speedup for #209, for versions of python < 3.5. Open gzip files with # faster reads on large files using a larger read buffer. See