From 7beeded77acc260fadb7b5366fc7fc247d1a8f7f Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sat, 13 Jan 2024 00:28:57 +0100 Subject: [PATCH 1/3] Fix GH-13071: Copying large files using mmap-able source streams may exhaust available memory and fail Commit 5cbe5a538c disabled chunking for all writes to streams. However, user streams have a callback where code is executed on data that is subject to the memory limit. Therefore, when using large writes or stream_copy_to_stream/copy the memory limit can easily be hit with large enough data. To solve this, we reintroduce chunking for userspace streams. Users have control over the chunk size, which is neat because they can improve the performance by setting the chunk size if that turns out to be a bottleneck. In an ideal world, we add an option so we can "ask" the stream whether it "prefers" chunked writes, similar to how we have php_stream_mmap_supported & friends. However, that cannot be done on stable branches. --- ext/standard/tests/file/userstreams_006.phpt | 3 ++- ext/standard/tests/streams/set_file_buffer.phpt | 1 + .../tests/streams/stream_set_chunk_size.phpt | 16 ++++++++++------ main/streams/streams.c | 9 ++++++++- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/ext/standard/tests/file/userstreams_006.phpt b/ext/standard/tests/file/userstreams_006.phpt index a432937dac29f..e5d341379f6d1 100644 --- a/ext/standard/tests/file/userstreams_006.phpt +++ b/ext/standard/tests/file/userstreams_006.phpt @@ -34,5 +34,6 @@ bool(true) option: 3, 2, 50 int(-1) int(8192) -size: 70 +size: 42 +size: 28 int(70) diff --git a/ext/standard/tests/streams/set_file_buffer.phpt b/ext/standard/tests/streams/set_file_buffer.phpt index ef808a24fd002..c39ba56cf6cfb 100644 --- a/ext/standard/tests/streams/set_file_buffer.phpt +++ b/ext/standard/tests/streams/set_file_buffer.phpt @@ -39,4 +39,5 @@ option: %d, %d, %d int(%i) int(%d) size: %d +size: 28 int(%d) diff --git a/ext/standard/tests/streams/stream_set_chunk_size.phpt b/ext/standard/tests/streams/stream_set_chunk_size.phpt index 77d9bac00ea4f..8bb5b46b7f94a 100644 --- a/ext/standard/tests/streams/stream_set_chunk_size.phpt +++ b/ext/standard/tests/streams/stream_set_chunk_size.phpt @@ -35,7 +35,7 @@ echo "should return previous chunk size (8192)\n"; var_dump(stream_set_chunk_size($f, 1)); echo "should be read without buffer (\$count == 10000)\n"; var_dump(strlen(fread($f, 10000))); -echo "should have no effect on writes\n"; +echo "should elicit 3 writes\n"; var_dump(fwrite($f, str_repeat('b', 3))); echo "should return previous chunk size (1)\n"; @@ -46,7 +46,7 @@ echo "should elicit one read of size 100 (chunk size)\n"; var_dump(strlen(fread($f, 50))); echo "should elicit no read because there is sufficient cached data\n"; var_dump(strlen(fread($f, 50))); -echo "should have no effect on writes\n"; +echo "should elicit 3 writes\n"; var_dump(strlen(fwrite($f, str_repeat('b', 250)))); echo "\nerror conditions\n"; @@ -68,8 +68,10 @@ int(8192) should be read without buffer ($count == 10000) read with size: 10000 int(10000) -should have no effect on writes -write with size: 3 +should elicit 3 writes +write with size: 1 +write with size: 1 +write with size: 1 int(3) should return previous chunk size (1) int(1) @@ -81,8 +83,10 @@ read with size: 100 int(50) should elicit no read because there is sufficient cached data int(50) -should have no effect on writes -write with size: 250 +should elicit 3 writes +write with size: 100 +write with size: 100 +write with size: 50 int(3) error conditions diff --git a/main/streams/streams.c b/main/streams/streams.c index e1399927b2783..d45a9bfab85f8 100644 --- a/main/streams/streams.c +++ b/main/streams/streams.c @@ -1133,8 +1133,15 @@ static ssize_t _php_stream_write_buffer(php_stream *stream, const char *buf, siz stream->ops->seek(stream, stream->position, SEEK_SET, &stream->position); } + /* See GH-13071: userspace stream is subject to the memory limit. */ + size_t chunk_size = count; + if (php_stream_is(stream, PHP_STREAM_IS_USERSPACE)) { + /* If the stream is unbuffered, we can only write one byte at a time. */ + chunk_size = stream->chunk_size; + } + while (count > 0) { - ssize_t justwrote = stream->ops->write(stream, buf, count); + ssize_t justwrote = stream->ops->write(stream, buf, MIN(chunk_size, count)); if (justwrote <= 0) { /* If we already successfully wrote some bytes and a write error occurred * later, report the successfully written bytes. */ From 796803ad3df07d5263584b124cd58182d5fedead Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Tue, 16 Jan 2024 22:47:55 +0100 Subject: [PATCH 2/3] Add test --- ext/standard/tests/file/gh13136.phpt | 50 ++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 ext/standard/tests/file/gh13136.phpt diff --git a/ext/standard/tests/file/gh13136.phpt b/ext/standard/tests/file/gh13136.phpt new file mode 100644 index 0000000000000..5a6cd420fc81b --- /dev/null +++ b/ext/standard/tests/file/gh13136.phpt @@ -0,0 +1,50 @@ +--TEST-- +GH-13071 (Copying large files using mmap-able source streams may exhaust available memory and fail) +--FILE-- +trim_path($path); + $this->file = fopen($path, $mode); + return true; + } + + public function stream_close() { + fclose($this->file); + return true; + } + + public function stream_write($data) { + self::$writes++; + return fwrite($this->file, $data); + } + + public function url_stat($path, $flags) { + return false; + } + + private function trim_path(string $path): string { + return substr($path, strlen("up://")); + } +} + +file_put_contents(__DIR__ . "/gh13071.tmp", str_repeat("a", 1024 * 1024 * 8)); + +stream_wrapper_register("up", CustomStream::class, STREAM_IS_URL); + +$old_limit = ini_get("memory_limit"); +ini_set("memory_limit", memory_get_usage(true) + 5 * 1024 * 1024); +copy(__DIR__ . "/gh13071.tmp", "up://" . __DIR__ . "/gh13071.out.tmp"); +ini_set("memory_limit", $old_limit); + +echo "Done ", CustomStream::$writes, " writes\n"; + +?> +--EXPECT-- +Done 1024 writes From 3360d699b0c72c8844f0ccee62982e94e0a6e293 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Tue, 16 Jan 2024 22:48:50 +0100 Subject: [PATCH 3/3] Add clean section to test --- ext/standard/tests/file/gh13136.phpt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ext/standard/tests/file/gh13136.phpt b/ext/standard/tests/file/gh13136.phpt index 5a6cd420fc81b..a90c07c91c280 100644 --- a/ext/standard/tests/file/gh13136.phpt +++ b/ext/standard/tests/file/gh13136.phpt @@ -45,6 +45,11 @@ ini_set("memory_limit", $old_limit); echo "Done ", CustomStream::$writes, " writes\n"; +?> +--CLEAN-- + --EXPECT-- Done 1024 writes