Skip to content

Commit f78ca6f

Browse files
macdicenbyavuz
andcommitted
Introduce file_copy_method setting.
It can be set to either COPY (the default) or CLONE if the system supports it. CLONE causes callers of copydir(), currently CREATE DATABASE ... STRATEGY=FILE_COPY and ALTER DATABASE ... SET TABLESPACE = ..., to use copy_file_range (Linux, FreeBSD) or copyfile (macOS) to copy files instead of a read-write loop over the contents. CLONE gives the kernel the opportunity to share block ranges on copy-on-write file systems and push copying down to storage on others, depending on configuration. On some systems CLONE can be used to clone large databases quickly with CREATE DATABASE ... TEMPLATE=source STRATEGY=FILE_COPY. Other operating systems could be supported; patches welcome. Co-authored-by: Nazir Bilal Yavuz <byavuz81@gmail.com> Reviewed-by: Robert Haas <robertmhaas@gmail.com> Reviewed-by: Ranier Vilela <ranier.vf@gmail.com> Discussion: https://postgr.es/m/CA%2BhUKGLM%2Bt%2BSwBU-cHeMUXJCOgBxSHLGZutV5zCwY4qrCcE02w%40mail.gmail.com
1 parent 042a662 commit f78ca6f

File tree

9 files changed

+159
-3
lines changed

9 files changed

+159
-3
lines changed

doc/src/sgml/config.sgml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2347,6 +2347,44 @@ include_dir 'conf.d'
23472347
</listitem>
23482348
</varlistentry>
23492349

2350+
<varlistentry id="guc_file_copy_method" xreflabel="file_copy_method">
2351+
<term><varname>file_copy_method</varname> (<type>enum</type>)
2352+
<indexterm>
2353+
<primary><varname>file_copy_method</varname> configuration parameter</primary>
2354+
</indexterm>
2355+
</term>
2356+
<listitem>
2357+
<para>
2358+
Specifies the method used to copy files.
2359+
Possible values are <literal>COPY</literal> (default) and
2360+
<literal>CLONE</literal> (if operating support is available).
2361+
</para>
2362+
2363+
<para>
2364+
This parameter affects:
2365+
</para>
2366+
<itemizedlist>
2367+
<listitem>
2368+
<para>
2369+
<literal><command>CREATE DATABASE ... STRATEGY=FILE_COPY</command></literal>
2370+
</para>
2371+
</listitem>
2372+
<listitem>
2373+
<para>
2374+
<command>ALTER DATABASE ... SET TABLESPACE ...</command>
2375+
</para>
2376+
</listitem>
2377+
</itemizedlist>
2378+
2379+
<para>
2380+
<literal>CLONE</literal> uses the <function>copy_file_range()</function>
2381+
(Linux, FreeBSD) or <function>copyfile</function>
2382+
(macOS) system calls, giving the kernel the opportunity to share disk
2383+
blocks or push work down to lower layers on some file systems.
2384+
</para>
2385+
</listitem>
2386+
</varlistentry>
2387+
23502388
<varlistentry id="guc-max-notify-queue-pages" xreflabel="max_notify_queue_pages">
23512389
<term><varname>max_notify_queue_pages</varname> (<type>integer</type>)
23522390
<indexterm>

doc/src/sgml/ref/alter_database.sgml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,8 @@ ALTER DATABASE <replaceable class="parameter">name</replaceable> RESET ALL
8282
default tablespace to the new tablespace. The new default tablespace
8383
must be empty for this database, and no one can be connected to
8484
the database. Tables and indexes in non-default tablespaces are
85-
unaffected.
85+
unaffected. The method used to copy files to the new tablespace
86+
is affected by the <xref glinkend="guc_file_copy_method"/> setting.
8687
</para>
8788

8889
<para>

doc/src/sgml/ref/create_database.sgml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,9 @@ CREATE DATABASE <replaceable class="parameter">name</replaceable>
138138
log volume substantially, especially if the template database is large,
139139
it also forces the system to perform a checkpoint both before and
140140
after the creation of the new database. In some situations, this may
141-
have a noticeable negative impact on overall system performance.
141+
have a noticeable negative impact on overall system performance. The
142+
<literal>FILE_COPY</literal> strategy is affected by the <xref
143+
linkend="guc_file_copy_method"/> setting.
142144
</para>
143145
</listitem>
144146
</varlistentry>

src/backend/storage/file/copydir.c

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818

1919
#include "postgres.h"
2020

21+
#ifdef HAVE_COPYFILE_H
22+
#include <copyfile.h>
23+
#endif
2124
#include <fcntl.h>
2225
#include <unistd.h>
2326

@@ -27,11 +30,19 @@
2730
#include "storage/copydir.h"
2831
#include "storage/fd.h"
2932

33+
/* GUCs */
34+
int file_copy_method = FILE_COPY_METHOD_COPY;
35+
36+
static void clone_file(const char *fromfile, const char *tofile);
37+
3038
/*
3139
* copydir: copy a directory
3240
*
3341
* If recurse is false, subdirectories are ignored. Anything that's not
3442
* a directory or a regular file is ignored.
43+
*
44+
* This function uses the file_copy_method GUC. New uses of this function must
45+
* be documented in doc/src/sgml/config.sgml.
3546
*/
3647
void
3748
copydir(const char *fromdir, const char *todir, bool recurse)
@@ -71,7 +82,12 @@ copydir(const char *fromdir, const char *todir, bool recurse)
7182
copydir(fromfile, tofile, true);
7283
}
7384
else if (xlde_type == PGFILETYPE_REG)
74-
copy_file(fromfile, tofile);
85+
{
86+
if (file_copy_method == FILE_COPY_METHOD_CLONE)
87+
clone_file(fromfile, tofile);
88+
else
89+
copy_file(fromfile, tofile);
90+
}
7591
}
7692
FreeDir(xldir);
7793

@@ -214,3 +230,65 @@ copy_file(const char *fromfile, const char *tofile)
214230

215231
pfree(buffer);
216232
}
233+
234+
/*
235+
* clone one file
236+
*/
237+
static void
238+
clone_file(const char *fromfile, const char *tofile)
239+
{
240+
#if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
241+
if (copyfile(fromfile, tofile, NULL, COPYFILE_CLONE_FORCE) < 0)
242+
ereport(ERROR,
243+
(errcode_for_file_access(),
244+
errmsg("could not clone file \"%s\" to \"%s\": %m",
245+
fromfile, tofile)));
246+
#elif defined(HAVE_COPY_FILE_RANGE)
247+
int srcfd;
248+
int dstfd;
249+
ssize_t nbytes;
250+
251+
srcfd = OpenTransientFile(fromfile, O_RDONLY | PG_BINARY);
252+
if (srcfd < 0)
253+
ereport(ERROR,
254+
(errcode_for_file_access(),
255+
errmsg("could not open file \"%s\": %m", fromfile)));
256+
257+
dstfd = OpenTransientFile(tofile, O_WRONLY | O_CREAT | O_EXCL | PG_BINARY);
258+
if (dstfd < 0)
259+
ereport(ERROR,
260+
(errcode_for_file_access(),
261+
errmsg("could not create file \"%s\": %m", tofile)));
262+
263+
do
264+
{
265+
/*
266+
* Don't copy too much at once, so we can check for interrupts from
267+
* time to time if it falls back to a slow copy.
268+
*/
269+
CHECK_FOR_INTERRUPTS();
270+
pgstat_report_wait_start(WAIT_EVENT_COPY_FILE_COPY);
271+
nbytes = copy_file_range(srcfd, NULL, dstfd, NULL, 1024 * 1024, 0);
272+
if (nbytes < 0 && errno != EINTR)
273+
ereport(ERROR,
274+
(errcode_for_file_access(),
275+
errmsg("could not clone file \"%s\" to \"%s\": %m",
276+
fromfile, tofile)));
277+
pgstat_report_wait_end();
278+
}
279+
while (nbytes != 0);
280+
281+
if (CloseTransientFile(dstfd) != 0)
282+
ereport(ERROR,
283+
(errcode_for_file_access(),
284+
errmsg("could not close file \"%s\": %m", tofile)));
285+
286+
if (CloseTransientFile(srcfd) != 0)
287+
ereport(ERROR,
288+
(errcode_for_file_access(),
289+
errmsg("could not close file \"%s\": %m", fromfile)));
290+
#else
291+
/* If there is no CLONE support this function should not be called. */
292+
pg_unreachable();
293+
#endif
294+
}

src/backend/utils/activity/wait_event_names.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ CONTROL_FILE_SYNC "Waiting for the <filename>pg_control</filename> file to reach
208208
CONTROL_FILE_SYNC_UPDATE "Waiting for an update to the <filename>pg_control</filename> file to reach durable storage."
209209
CONTROL_FILE_WRITE "Waiting for a write to the <filename>pg_control</filename> file."
210210
CONTROL_FILE_WRITE_UPDATE "Waiting for a write to update the <filename>pg_control</filename> file."
211+
COPY_FILE_COPY "Waiting for a file copy operation."
211212
COPY_FILE_READ "Waiting for a read during a file copy operation."
212213
COPY_FILE_WRITE "Waiting for a write during a file copy operation."
213214
DATA_FILE_EXTEND "Waiting for a relation data file to be extended."

src/backend/utils/misc/guc_tables.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
*/
2121
#include "postgres.h"
2222

23+
#ifdef HAVE_COPYFILE_H
24+
#include <copyfile.h>
25+
#endif
2326
#include <float.h>
2427
#include <limits.h>
2528
#ifdef HAVE_SYSLOG
@@ -76,6 +79,7 @@
7679
#include "storage/aio.h"
7780
#include "storage/bufmgr.h"
7881
#include "storage/bufpage.h"
82+
#include "storage/copydir.h"
7983
#include "storage/io_worker.h"
8084
#include "storage/large_object.h"
8185
#include "storage/pg_shmem.h"
@@ -479,6 +483,14 @@ static const struct config_enum_entry wal_compression_options[] = {
479483
{NULL, 0, false}
480484
};
481485

486+
static const struct config_enum_entry file_copy_method_options[] = {
487+
{"copy", FILE_COPY_METHOD_COPY, false},
488+
#if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE) || defined(HAVE_COPY_FILE_RANGE)
489+
{"clone", FILE_COPY_METHOD_CLONE, false},
490+
#endif
491+
{NULL, 0, false}
492+
};
493+
482494
/*
483495
* Options for enum values stored in other modules
484496
*/
@@ -5242,6 +5254,16 @@ struct config_enum ConfigureNamesEnum[] =
52425254
NULL, NULL, NULL
52435255
},
52445256

5257+
{
5258+
{"file_copy_method", PGC_USERSET, RESOURCES_DISK,
5259+
gettext_noop("Selects the file copy method."),
5260+
NULL
5261+
},
5262+
&file_copy_method,
5263+
FILE_COPY_METHOD_COPY, file_copy_method_options,
5264+
NULL, NULL, NULL
5265+
},
5266+
52455267
{
52465268
{"wal_sync_method", PGC_SIGHUP, WAL_SETTINGS,
52475269
gettext_noop("Selects the method used for forcing WAL updates to disk."),

src/backend/utils/misc/postgresql.conf.sample

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,10 @@
183183
#max_notify_queue_pages = 1048576 # limits the number of SLRU pages allocated
184184
# for NOTIFY / LISTEN queue
185185

186+
#file_copy_method = copy # the default is the first option
187+
# copy
188+
# clone (if system support is available)
189+
186190
# - Kernel Resources -
187191

188192
#max_files_per_process = 1000 # min 64

src/include/storage/copydir.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,15 @@
1313
#ifndef COPYDIR_H
1414
#define COPYDIR_H
1515

16+
typedef enum FileCopyMethod
17+
{
18+
FILE_COPY_METHOD_COPY,
19+
FILE_COPY_METHOD_CLONE,
20+
} FileCopyMethod;
21+
22+
/* GUC parameters */
23+
extern PGDLLIMPORT int file_copy_method;
24+
1625
extern void copydir(const char *fromdir, const char *todir, bool recurse);
1726
extern void copy_file(const char *fromfile, const char *tofile);
1827

src/tools/pgindent/typedefs.list

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -804,6 +804,7 @@ FieldSelect
804804
FieldStore
805805
File
806806
FileBackupMethod
807+
FileCopyMethod
807808
FileFdwExecutionState
808809
FileFdwPlanState
809810
FileNameMap

0 commit comments

Comments
 (0)