Skip to content

REF/FIX: Correct link/copy behavior #1391

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 21, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ before_install:
echo 'include_dirs = /usr/include:/usr/include/X11' >> $HOME/.numpy-site.cfg;
fi
install:
- sudo apt-get install fusefat
- conda update --yes conda
- conda create -n testenv --yes pip python=$TRAVIS_PYTHON_VERSION
- source activate testenv
Expand Down
1 change: 1 addition & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Next release
* ENH: New interfaces for interacting with AWS S3: S3DataSink and S3DataGrabber (https://github.com/nipy/nipype/pull/1201)
* ENH: Interfaces for MINC tools (https://github.com/nipy/nipype/pull/1304)
* FIX: Use realpath to determine hard link source (https://github.com/nipy/nipype/pull/1388)
* FIX: Correct linking/copying fallback behavior (https://github.com/nipy/nipype/pull/1391)

Release 0.11.0 (September 15, 2015)
============
Expand Down
2 changes: 1 addition & 1 deletion circle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ dependencies:
- bash <(wget -q -O- http://neuro.debian.net/_files/neurodebian-travis.sh)
override:
# Install apt packages
- sudo apt-get install -y fsl-core fsl-atlases fsl-mni152-templates fsl-feeds afni swig python-vtk xvfb
- sudo apt-get install -y fsl-core fsl-atlases fsl-mni152-templates fsl-feeds afni swig python-vtk xvfb fusefat
- echo 'source /etc/fsl/fsl.sh' >> $HOME/.profile
- echo 'source /etc/afni/afni.sh' >> $HOME/.profile
- mkdir -p ~/examples/ && ln -sf /usr/share/fsl-feeds/ ~/examples/feeds
Expand Down
2 changes: 1 addition & 1 deletion nipype/testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from numpy.testing import *

from . import decorators as dec
from .utils import skip_if_no_package, package_check
from .utils import skip_if_no_package, package_check, TempFATFS

skipif = dec.skipif

Expand Down
19 changes: 19 additions & 0 deletions nipype/testing/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
"""Test testing utilities
"""

import os
import warnings
from nipype.testing.utils import TempFATFS
from nose.tools import assert_true


def test_tempfatfs():
try:
fatfs = TempFATFS()
except IOError:
warnings.warn("Cannot mount FAT filesystems with FUSE")
else:
with fatfs as tmpdir:
yield assert_true, os.path.exists(tmpdir)
69 changes: 69 additions & 0 deletions nipype/testing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
"""
__docformat__ = 'restructuredtext'

import os
import time
import shutil
import signal
import subprocess
from tempfile import mkdtemp
from ..utils.misc import package_check
from nose import SkipTest

Expand All @@ -19,3 +25,66 @@ def skip_if_no_package(*args, **kwargs):
package_check(exc_failed_import=SkipTest,
exc_failed_check=SkipTest,
*args, **kwargs)


class TempFATFS(object):
def __init__(self, size_in_mbytes=8, delay=0.5):
"""Temporary filesystem for testing non-POSIX filesystems on a POSIX
system.

with TempFATFS() as fatdir:
target = os.path.join(fatdir, 'target')
copyfile(file1, target, copy=False)
assert_false(os.path.islink(target))

Arguments
---------
size_in_mbytes : int
Size (in MiB) of filesystem to create
delay : float
Time (in seconds) to wait for fusefat to start, stop
"""
self.delay = delay
self.tmpdir = mkdtemp()
self.dev_null = open(os.devnull, 'wb')

vfatfile = os.path.join(self.tmpdir, 'vfatblock')
self.vfatmount = os.path.join(self.tmpdir, 'vfatmount')
self.canary = os.path.join(self.vfatmount, '.canary')

with open(vfatfile, 'wb') as fobj:
fobj.write(b'\x00' * (int(size_in_mbytes) << 20))
os.mkdir(self.vfatmount)

mkfs_args = ['mkfs.vfat', vfatfile]
mount_args = ['fusefat', '-o', 'rw+', '-f', vfatfile, self.vfatmount]

subprocess.check_call(args=mkfs_args, stdout=self.dev_null,
stderr=self.dev_null)
self.fusefat = subprocess.Popen(args=mount_args, stdout=self.dev_null,
stderr=self.dev_null)
time.sleep(self.delay)

if self.fusefat.poll() is not None:
raise IOError("fatfuse terminated too soon")

open(self.canary, 'wb').close()

def __enter__(self):
return self.vfatmount

def __exit__(self, exc_type, exc_val, exc_tb):
if self.fusefat is not None:
self.fusefat.send_signal(signal.SIGINT)

# Allow 1s to return without sending terminate
for count in range(10):
time.sleep(0.1)
if self.fusefat.poll() is not None:
break
else:
self.fusefat.terminate()
time.sleep(self.delay)
assert not os.path.exists(self.canary)
self.dev_null.close()
shutil.rmtree(self.tmpdir)
150 changes: 79 additions & 71 deletions nipype/utils/filemanip.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
import os
import re
import shutil
import posixpath

import numpy as np

from .misc import is_container
from .config import mkdir_p
from ..external.six import string_types
from ..interfaces.traits_extension import isdefined

Expand All @@ -31,31 +31,6 @@ class FileNotFoundError(Exception):
pass


def nipype_hardlink_wrapper(raw_src, raw_dst):
"""Attempt to use hard link instead of file copy.
The intent is to avoid unnnecessary duplication
of large files when using a DataSink.
Hard links are not supported on all file systems
or os environments, and will not succeed if the
src and dst are not on the same physical hardware
partition.
If the hardlink fails, then fall back to using
a standard copy.
"""
# Use realpath to avoid hardlinking symlinks
src = os.path.realpath(raw_src)
# Use normpath, in case destination is a symlink
dst = os.path.normpath(raw_dst)
del raw_src
del raw_dst
if src != dst and os.path.exists(dst):
os.unlink(dst) # First remove destination
try:
os.link(src, dst) # Reference same inode to avoid duplication
except:
shutil.copyfile(src, dst) # Fall back to traditional copy


def split_filename(fname):
"""Split a filename into parts: path, base filename and extension.

Expand Down Expand Up @@ -201,7 +176,13 @@ def hash_timestamp(afile):

def copyfile(originalfile, newfile, copy=False, create_new=False,
hashmethod=None, use_hardlink=False):
"""Copy or symlink ``originalfile`` to ``newfile``.
"""Copy or link ``originalfile`` to ``newfile``.

If ``use_hardlink`` is True, and the file can be hard-linked, then a
link is created, instead of copying the file.

If a hard link is not created and ``copy`` is False, then a symbolic
link is created.

Parameters
----------
Expand All @@ -212,6 +193,9 @@ def copyfile(originalfile, newfile, copy=False, create_new=False,
copy : Bool
specifies whether to copy or symlink files
(default=False) but only for POSIX systems
use_hardlink : Bool
specifies whether to hard-link files, when able
(Default=False), taking precedence over copy

Returns
-------
Expand All @@ -237,63 +221,87 @@ def copyfile(originalfile, newfile, copy=False, create_new=False,
if hashmethod is None:
hashmethod = config.get('execution', 'hash_method').lower()

elif os.path.exists(newfile):
if hashmethod == 'timestamp':
newhash = hash_timestamp(newfile)
elif hashmethod == 'content':
newhash = hash_infile(newfile)
fmlogger.debug("File: %s already exists,%s, copy:%d"
% (newfile, newhash, copy))
# the following seems unnecessary
# if os.name is 'posix' and copy:
# if os.path.lexists(newfile) and os.path.islink(newfile):
# os.unlink(newfile)
# newhash = None
if os.name is 'posix' and not copy:
if os.path.lexists(newfile):
if hashmethod == 'timestamp':
orighash = hash_timestamp(originalfile)
elif hashmethod == 'content':
orighash = hash_infile(originalfile)
fmlogger.debug('Original hash: %s, %s' % (originalfile, orighash))
if newhash != orighash:
os.unlink(newfile)
if (newhash is None) or (newhash != orighash):
os.symlink(originalfile, newfile)
else:
if newhash:
# Existing file
# -------------
# Options:
# symlink
# to originalfile (keep if not (use_hardlink or copy))
# to other file (unlink)
# regular file
# hard link to originalfile (keep)
# copy of file (same hash) (keep)
# different file (diff hash) (unlink)
keep = False
if os.path.lexists(newfile):
if os.path.islink(newfile):
if all(os.path.readlink(newfile) == originalfile, not use_hardlink,
not copy):
keep = True
elif posixpath.samefile(newfile, originalfile):
keep = True
else:
if hashmethod == 'timestamp':
orighash = hash_timestamp(originalfile)
hashfn = hash_timestamp
elif hashmethod == 'content':
orighash = hash_infile(originalfile)
if (newhash is None) or (newhash != orighash):
try:
fmlogger.debug("Copying File: %s->%s" %
(newfile, originalfile))
if use_hardlink:
nipype_hardlink_wrapper(originalfile, newfile)
else:
shutil.copyfile(originalfile, newfile)
except shutil.Error as e:
fmlogger.warn(e.message)
else:
hashfn = hash_infile
newhash = hashfn(newfile)
fmlogger.debug("File: %s already exists,%s, copy:%d" %
(newfile, newhash, copy))
orighash = hashfn(originalfile)
keep = newhash == orighash
if keep:
fmlogger.debug("File: %s already exists, not overwriting, copy:%d"
% (newfile, copy))
else:
os.unlink(newfile)

# New file
# --------
# use_hardlink & can_hardlink => hardlink
# ~hardlink & ~copy & can_symlink => symlink
# ~hardlink & ~symlink => copy
if not keep and use_hardlink:
try:
fmlogger.debug("Linking File: %s->%s" % (newfile, originalfile))
# Use realpath to avoid hardlinking symlinks
os.link(os.path.realpath(originalfile), newfile)
except OSError:
use_hardlink = False # Disable hardlink for associated files
else:
keep = True

if not keep and not copy and os.name == 'posix':
try:
fmlogger.debug("Symlinking File: %s->%s" % (newfile, originalfile))
os.symlink(originalfile, newfile)
except OSError:
copy = True # Disable symlink for associated files
else:
keep = True

if not keep:
try:
fmlogger.debug("Copying File: %s->%s" % (newfile, originalfile))
shutil.copyfile(originalfile, newfile)
except shutil.Error as e:
fmlogger.warn(e.message)

# Associated files
if originalfile.endswith(".img"):
hdrofile = originalfile[:-4] + ".hdr"
hdrnfile = newfile[:-4] + ".hdr"
matofile = originalfile[:-4] + ".mat"
if os.path.exists(matofile):
matnfile = newfile[:-4] + ".mat"
copyfile(matofile, matnfile, copy, create_new, hashmethod,
use_hardlink)
copyfile(hdrofile, hdrnfile, copy, create_new, hashmethod,
use_hardlink)
copyfile(matofile, matnfile, copy, hashmethod=hashmethod,
use_hardlink=use_hardlink)
copyfile(hdrofile, hdrnfile, copy, hashmethod=hashmethod,
use_hardlink=use_hardlink)
elif originalfile.endswith(".BRIK"):
hdrofile = originalfile[:-5] + ".HEAD"
hdrnfile = newfile[:-5] + ".HEAD"
copyfile(hdrofile, hdrnfile, copy, create_new, hashmethod,
use_hardlink)
copyfile(hdrofile, hdrnfile, copy, hashmethod=hashmethod,
use_hardlink=use_hardlink)

return newfile

Expand Down
34 changes: 33 additions & 1 deletion nipype/utils/tests/test_filemanip.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

import os
from tempfile import mkstemp, mkdtemp
import warnings

from nipype.testing import assert_equal, assert_true, assert_false
from nipype.testing import assert_equal, assert_true, assert_false, TempFATFS
from nipype.utils.filemanip import (save_json, load_json,
fname_presuffix, fnames_presuffix,
hash_rename, check_forhash,
Expand Down Expand Up @@ -167,6 +168,37 @@ def test_linkchain():
os.unlink(orig_hdr)


def test_copyfallback():
if os.name is not 'posix':
return
orig_img, orig_hdr = _temp_analyze_files()
pth, imgname = os.path.split(orig_img)
pth, hdrname = os.path.split(orig_hdr)
try:
fatfs = TempFATFS()
except IOError:
warnings.warn('Fuse mount failed. copyfile fallback tests skipped.')
else:
with fatfs as fatdir:
tgt_img = os.path.join(fatdir, imgname)
tgt_hdr = os.path.join(fatdir, hdrname)
for copy in (True, False):
for use_hardlink in (True, False):
copyfile(orig_img, tgt_img, copy=copy,
use_hardlink=use_hardlink)
yield assert_true, os.path.exists(tgt_img)
yield assert_true, os.path.exists(tgt_hdr)
yield assert_false, os.path.islink(tgt_img)
yield assert_false, os.path.islink(tgt_hdr)
yield assert_false, os.path.samefile(orig_img, tgt_img)
yield assert_false, os.path.samefile(orig_hdr, tgt_hdr)
os.unlink(tgt_img)
os.unlink(tgt_hdr)
finally:
os.unlink(orig_img)
os.unlink(orig_hdr)


def test_filename_to_list():
x = filename_to_list('foo.nii')
yield assert_equal, x, ['foo.nii']
Expand Down