Skip to content

Commit 5d52d47

Browse files
committed
Fix #69181: READ_CSV|DROP_NEW_LINE drops newlines within fields
One may argue that `DROP_NEW_LINE` does not make sense in combination with `READ_CSV`, but without `DROP_NEW_LINE`, `SKIP_EMPTY` does not skip empty lines at all. We could fix that, but do not for BC reasons. Instead we no longer drop newlines in `spl_filesystem_file_read_ex()` when reading CSV, but handle that in `spl_filesystem_file_read_csv()` by treating lines with only (CR)LF as being empty as well. Closes GH-7618.
1 parent 2fd5e82 commit 5d52d47

File tree

3 files changed

+93
-9
lines changed

3 files changed

+93
-9
lines changed

NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ PHP NEWS
3030
- Sockets:
3131
. Added SOL_FILTER socket option for Solaris. (David Carlier)
3232

33+
- SPL:
34+
. Fixed bug #69181 (READ_CSV|DROP_NEW_LINE drops newlines within fields).
35+
(cmb)
36+
3337
21 Jul 2022, PHP 8.2.0beta1
3438

3539
- CLI:

ext/spl/spl_directory.c

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1887,7 +1887,7 @@ static zend_result spl_filesystem_object_cast(zend_object *readobj, zval *writeo
18871887
}
18881888
/* }}} */
18891889

1890-
static zend_result spl_filesystem_file_read_ex(spl_filesystem_object *intern, bool silent, zend_long line_add) /* {{{ */
1890+
static zend_result spl_filesystem_file_read_ex(spl_filesystem_object *intern, bool silent, zend_long line_add, bool csv) /* {{{ */
18911891
{
18921892
char *buf;
18931893
size_t line_len = 0;
@@ -1917,7 +1917,7 @@ static zend_result spl_filesystem_file_read_ex(spl_filesystem_object *intern, bo
19171917
intern->u.file.current_line = estrdup("");
19181918
intern->u.file.current_line_len = 0;
19191919
} else {
1920-
if (SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_DROP_NEW_LINE)) {
1920+
if (!csv && SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_DROP_NEW_LINE)) {
19211921
if (line_len > 0 && buf[line_len - 1] == '\n') {
19221922
line_len--;
19231923
if (line_len > 0 && buf[line_len - 1] == '\r') {
@@ -1935,20 +1935,30 @@ static zend_result spl_filesystem_file_read_ex(spl_filesystem_object *intern, bo
19351935
return SUCCESS;
19361936
} /* }}} */
19371937

1938-
static inline zend_result spl_filesystem_file_read(spl_filesystem_object *intern, bool silent)
1938+
static inline zend_result spl_filesystem_file_read(spl_filesystem_object *intern, bool silent, bool csv)
19391939
{
19401940
zend_long line_add = (intern->u.file.current_line) ? 1 : 0;
1941-
return spl_filesystem_file_read_ex(intern, silent, line_add);
1941+
return spl_filesystem_file_read_ex(intern, silent, line_add, csv);
1942+
}
1943+
1944+
static bool is_line_empty(spl_filesystem_object *intern)
1945+
{
1946+
char *current_line = intern->u.file.current_line;
1947+
size_t current_line_len = intern->u.file.current_line_len;
1948+
return current_line_len == 0
1949+
|| ((SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_READ_CSV) && SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_DROP_NEW_LINE)
1950+
&& ((current_line_len == 1 && current_line[0] == '\n')
1951+
|| (current_line_len == 2 && current_line[0] == '\r' && current_line[1] == '\n'))));
19421952
}
19431953

19441954
static zend_result spl_filesystem_file_read_csv(spl_filesystem_object *intern, char delimiter, char enclosure, int escape, zval *return_value) /* {{{ */
19451955
{
19461956
do {
1947-
zend_result ret = spl_filesystem_file_read(intern, /* silent */ true);
1957+
zend_result ret = spl_filesystem_file_read(intern, /* silent */ true, /* csv */ true);
19481958
if (ret != SUCCESS) {
19491959
return ret;
19501960
}
1951-
} while (!intern->u.file.current_line_len && SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_SKIP_EMPTY));
1961+
} while (is_line_empty(intern) && SPL_HAS_FLAG(intern->flags, SPL_FILE_OBJECT_SKIP_EMPTY));
19521962

19531963
size_t buf_len = intern->u.file.current_line_len;
19541964
char *buf = estrndup(intern->u.file.current_line, buf_len);
@@ -2006,7 +2016,7 @@ static zend_result spl_filesystem_file_read_line_ex(zval * this_ptr, spl_filesys
20062016
zval_ptr_dtor(&retval);
20072017
return SUCCESS;
20082018
} else {
2009-
return spl_filesystem_file_read(intern, /* silent */ true);
2019+
return spl_filesystem_file_read(intern, /* silent */ true, /* csv */ false);
20102020
}
20112021
} /* }}} */
20122022

@@ -2214,7 +2224,7 @@ PHP_METHOD(SplFileObject, fgets)
22142224

22152225
CHECK_SPL_FILE_OBJECT_IS_INITIALIZED(intern);
22162226

2217-
if (spl_filesystem_file_read_ex(intern, /* silent */ false, /* line_add */ 1) == FAILURE) {
2227+
if (spl_filesystem_file_read_ex(intern, /* silent */ false, /* line_add */ 1, /* csv */ false) == FAILURE) {
22182228
RETURN_THROWS();
22192229
}
22202230
RETURN_STRINGL(intern->u.file.current_line, intern->u.file.current_line_len);
@@ -2644,7 +2654,7 @@ PHP_METHOD(SplFileObject, fscanf)
26442654
CHECK_SPL_FILE_OBJECT_IS_INITIALIZED(intern);
26452655

26462656
/* Get next line */
2647-
if (spl_filesystem_file_read(intern, /* silent */ false) == FAILURE) {
2657+
if (spl_filesystem_file_read(intern, /* silent */ false, /* csv */ false) == FAILURE) {
26482658
RETURN_THROWS();
26492659
}
26502660

ext/spl/tests/bug69181.phpt

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
--TEST--
2+
Bug #69181 (READ_CSV|DROP_NEW_LINE drops newlines within fields)
3+
--FILE--
4+
<?php
5+
$filename = __DIR__ . "/bug69181.csv";
6+
$csv = <<<CSV
7+
"foo\n\nbar\nbaz",qux
8+
9+
"foo\nbar\nbaz",qux
10+
CSV;
11+
12+
file_put_contents($filename, $csv);
13+
14+
$file = new SplFileObject($filename);
15+
$file->setFlags(SplFileObject::SKIP_EMPTY | SplFileObject::DROP_NEW_LINE | SplFileObject::READ_CSV);
16+
var_dump(iterator_to_array($file));
17+
18+
echo "\n====\n\n";
19+
20+
$file->rewind();
21+
while (($record = $file->fgetcsv())) {
22+
var_dump($record);
23+
}
24+
?>
25+
--EXPECT--
26+
array(2) {
27+
[0]=>
28+
array(2) {
29+
[0]=>
30+
string(12) "foo
31+
32+
bar
33+
baz"
34+
[1]=>
35+
string(3) "qux"
36+
}
37+
[2]=>
38+
array(2) {
39+
[0]=>
40+
string(11) "foo
41+
bar
42+
baz"
43+
[1]=>
44+
string(3) "qux"
45+
}
46+
}
47+
48+
====
49+
50+
array(2) {
51+
[0]=>
52+
string(12) "foo
53+
54+
bar
55+
baz"
56+
[1]=>
57+
string(3) "qux"
58+
}
59+
array(2) {
60+
[0]=>
61+
string(11) "foo
62+
bar
63+
baz"
64+
[1]=>
65+
string(3) "qux"
66+
}
67+
--CLEAN--
68+
<?php
69+
@unlink(__DIR__ . "/bug69181.csv");
70+
?>

0 commit comments

Comments
 (0)