Skip to content

Commit 3145221

Browse files
committed
RF: Simplify copyfiles, updating logic for links
1 parent 192ea22 commit 3145221

File tree

1 file changed

+79
-75
lines changed

1 file changed

+79
-75
lines changed

nipype/utils/filemanip.py

Lines changed: 79 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@
1515
import os
1616
import re
1717
import shutil
18+
import posixpath
1819

1920
import numpy as np
2021

2122
from .misc import is_container
22-
from .config import mkdir_p
2323
from ..external.six import string_types
2424
from ..interfaces.traits_extension import isdefined
2525

@@ -31,31 +31,6 @@ class FileNotFoundError(Exception):
3131
pass
3232

3333

34-
def nipype_hardlink_wrapper(raw_src, raw_dst):
35-
"""Attempt to use hard link instead of file copy.
36-
The intent is to avoid unnnecessary duplication
37-
of large files when using a DataSink.
38-
Hard links are not supported on all file systems
39-
or os environments, and will not succeed if the
40-
src and dst are not on the same physical hardware
41-
partition.
42-
If the hardlink fails, then fall back to using
43-
a standard copy.
44-
"""
45-
# Use realpath to avoid hardlinking symlinks
46-
src = os.path.realpath(raw_src)
47-
# Use normpath, in case destination is a symlink
48-
dst = os.path.normpath(raw_dst)
49-
del raw_src
50-
del raw_dst
51-
if src != dst and os.path.exists(dst):
52-
os.unlink(dst) # First remove destination
53-
try:
54-
os.link(src, dst) # Reference same inode to avoid duplication
55-
except:
56-
shutil.copyfile(src, dst) # Fall back to traditional copy
57-
58-
5934
def split_filename(fname):
6035
"""Split a filename into parts: path, base filename and extension.
6136
@@ -201,7 +176,13 @@ def hash_timestamp(afile):
201176

202177
def copyfile(originalfile, newfile, copy=False, create_new=False,
203178
hashmethod=None, use_hardlink=False):
204-
"""Copy or symlink ``originalfile`` to ``newfile``.
179+
"""Copy or link ``originalfile`` to ``newfile``.
180+
181+
If ``use_hardlink`` is True, and the file can be hard-linked, then a
182+
link is created, instead of copying the file.
183+
184+
If a hard link is not created and ``copy`` is False, then a symbolic
185+
link is created.
205186
206187
Parameters
207188
----------
@@ -212,6 +193,9 @@ def copyfile(originalfile, newfile, copy=False, create_new=False,
212193
copy : Bool
213194
specifies whether to copy or symlink files
214195
(default=False) but only for POSIX systems
196+
use_hardlink : Bool
197+
specifies whether to hard-link files, when able
198+
(Default=False), taking precedence over copy
215199
216200
Returns
217201
-------
@@ -237,67 +221,87 @@ def copyfile(originalfile, newfile, copy=False, create_new=False,
237221
if hashmethod is None:
238222
hashmethod = config.get('execution', 'hash_method').lower()
239223

240-
elif os.path.exists(newfile):
241-
if hashmethod == 'timestamp':
242-
newhash = hash_timestamp(newfile)
243-
elif hashmethod == 'content':
244-
newhash = hash_infile(newfile)
245-
fmlogger.debug("File: %s already exists,%s, copy:%d"
246-
% (newfile, newhash, copy))
247-
# the following seems unnecessary
248-
# if os.name is 'posix' and copy:
249-
# if os.path.lexists(newfile) and os.path.islink(newfile):
250-
# os.unlink(newfile)
251-
# newhash = None
252-
if os.name is 'posix' and not copy:
253-
if os.path.lexists(newfile):
254-
if hashmethod == 'timestamp':
255-
orighash = hash_timestamp(originalfile)
256-
elif hashmethod == 'content':
257-
orighash = hash_infile(originalfile)
258-
fmlogger.debug('Original hash: %s, %s' % (originalfile, orighash))
259-
if newhash != orighash:
260-
os.unlink(newfile)
261-
if (newhash is None) or (newhash != orighash):
262-
try:
263-
os.symlink(originalfile, newfile)
264-
except OSError:
265-
return copyfile(originalfile, newfile, True, create_new,
266-
hashmethod, use_hardlink)
267-
else:
268-
if newhash:
224+
# Existing file
225+
# -------------
226+
# Options:
227+
# symlink
228+
# to originalfile (keep if not (use_hardlink or copy))
229+
# to other file (unlink)
230+
# regular file
231+
# hard link to originalfile (keep)
232+
# copy of file (same hash) (keep)
233+
# different file (diff hash) (unlink)
234+
keep = False
235+
if os.path.lexists(newfile):
236+
if os.path.islink(newfile):
237+
if all(os.path.readlink(newfile) == originalfile, not use_hardlink,
238+
not copy):
239+
keep = True
240+
elif posixpath.samefile(newfile, originalfile):
241+
keep = True
242+
else:
269243
if hashmethod == 'timestamp':
270-
orighash = hash_timestamp(originalfile)
244+
hashfn = hash_timestamp
271245
elif hashmethod == 'content':
272-
orighash = hash_infile(originalfile)
273-
if (newhash is None) or (newhash != orighash):
274-
try:
275-
fmlogger.debug("Copying File: %s->%s" %
276-
(newfile, originalfile))
277-
if use_hardlink:
278-
nipype_hardlink_wrapper(originalfile, newfile)
279-
else:
280-
shutil.copyfile(originalfile, newfile)
281-
except shutil.Error as e:
282-
fmlogger.warn(e.message)
283-
else:
246+
hashfn = hash_infile
247+
newhash = hashfn(newfile)
248+
fmlogger.debug("File: %s already exists,%s, copy:%d" %
249+
(newfile, newhash, copy))
250+
orighash = hashfn(originalfile)
251+
keep = newhash == orighash
252+
if keep:
284253
fmlogger.debug("File: %s already exists, not overwriting, copy:%d"
285254
% (newfile, copy))
255+
else:
256+
os.unlink(newfile)
257+
258+
# New file
259+
# --------
260+
# use_hardlink & can_hardlink => hardlink
261+
# ~hardlink & ~copy & can_symlink => symlink
262+
# ~hardlink & ~symlink => copy
263+
if not keep and use_hardlink:
264+
try:
265+
fmlogger.debug("Linking File: %s->%s" % (newfile, originalfile))
266+
# Use realpath to avoid hardlinking symlinks
267+
os.link(os.path.realpath(originalfile), newfile)
268+
except OSError:
269+
use_hardlink = False # Disable hardlink for associated files
270+
else:
271+
keep = True
272+
273+
if not keep and not copy and os.name == 'posix':
274+
try:
275+
fmlogger.debug("Symlinking File: %s->%s" % (newfile, originalfile))
276+
os.symlink(originalfile, newfile)
277+
except OSError:
278+
copy = True # Disable symlink for associated files
279+
else:
280+
keep = True
281+
282+
if not keep:
283+
try:
284+
fmlogger.debug("Copying File: %s->%s" % (newfile, originalfile))
285+
shutil.copyfile(originalfile, newfile)
286+
except shutil.Error as e:
287+
fmlogger.warn(e.message)
288+
289+
# Associated files
286290
if originalfile.endswith(".img"):
287291
hdrofile = originalfile[:-4] + ".hdr"
288292
hdrnfile = newfile[:-4] + ".hdr"
289293
matofile = originalfile[:-4] + ".mat"
290294
if os.path.exists(matofile):
291295
matnfile = newfile[:-4] + ".mat"
292-
copyfile(matofile, matnfile, copy, create_new, hashmethod,
293-
use_hardlink)
294-
copyfile(hdrofile, hdrnfile, copy, create_new, hashmethod,
295-
use_hardlink)
296+
copyfile(matofile, matnfile, copy, hashmethod=hashmethod,
297+
use_hardlink=use_hardlink)
298+
copyfile(hdrofile, hdrnfile, copy, hashmethod=hashmethod,
299+
use_hardlink=use_hardlink)
296300
elif originalfile.endswith(".BRIK"):
297301
hdrofile = originalfile[:-5] + ".HEAD"
298302
hdrnfile = newfile[:-5] + ".HEAD"
299-
copyfile(hdrofile, hdrnfile, copy, create_new, hashmethod,
300-
use_hardlink)
303+
copyfile(hdrofile, hdrnfile, copy, hashmethod=hashmethod,
304+
use_hardlink=use_hardlink)
301305

302306
return newfile
303307

0 commit comments

Comments
 (0)