Skip to content

Commit c8351a6

Browse files
gh-113796: Add more validation checks in the csv.Dialect constructor (GH-113797)
ValueError is now raised if the same character is used in different roles.
1 parent 2f2ddab commit c8351a6

File tree

3 files changed

+96
-13
lines changed

3 files changed

+96
-13
lines changed

Lib/test/test_csv.py

Lines changed: 54 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,20 @@ class Test_Csv(unittest.TestCase):
2828
in TestDialectRegistry.
2929
"""
3030
def _test_arg_valid(self, ctor, arg):
31+
ctor(arg)
3132
self.assertRaises(TypeError, ctor)
3233
self.assertRaises(TypeError, ctor, None)
33-
self.assertRaises(TypeError, ctor, arg, bad_attr = 0)
34-
self.assertRaises(TypeError, ctor, arg, delimiter = 0)
35-
self.assertRaises(TypeError, ctor, arg, delimiter = 'XX')
34+
self.assertRaises(TypeError, ctor, arg, bad_attr=0)
35+
self.assertRaises(TypeError, ctor, arg, delimiter='')
36+
self.assertRaises(TypeError, ctor, arg, escapechar='')
37+
self.assertRaises(TypeError, ctor, arg, quotechar='')
38+
self.assertRaises(TypeError, ctor, arg, delimiter='^^')
39+
self.assertRaises(TypeError, ctor, arg, escapechar='^^')
40+
self.assertRaises(TypeError, ctor, arg, quotechar='^^')
3641
self.assertRaises(csv.Error, ctor, arg, 'foo')
3742
self.assertRaises(TypeError, ctor, arg, delimiter=None)
3843
self.assertRaises(TypeError, ctor, arg, delimiter=1)
44+
self.assertRaises(TypeError, ctor, arg, escapechar=1)
3945
self.assertRaises(TypeError, ctor, arg, quotechar=1)
4046
self.assertRaises(TypeError, ctor, arg, lineterminator=None)
4147
self.assertRaises(TypeError, ctor, arg, lineterminator=1)
@@ -46,6 +52,40 @@ def _test_arg_valid(self, ctor, arg):
4652
quoting=csv.QUOTE_ALL, quotechar=None)
4753
self.assertRaises(TypeError, ctor, arg,
4854
quoting=csv.QUOTE_NONE, quotechar='')
55+
self.assertRaises(ValueError, ctor, arg, delimiter='\n')
56+
self.assertRaises(ValueError, ctor, arg, escapechar='\n')
57+
self.assertRaises(ValueError, ctor, arg, quotechar='\n')
58+
self.assertRaises(ValueError, ctor, arg, delimiter='\r')
59+
self.assertRaises(ValueError, ctor, arg, escapechar='\r')
60+
self.assertRaises(ValueError, ctor, arg, quotechar='\r')
61+
ctor(arg, delimiter=' ')
62+
ctor(arg, escapechar=' ')
63+
ctor(arg, quotechar=' ')
64+
ctor(arg, delimiter='\t', skipinitialspace=True)
65+
ctor(arg, escapechar='\t', skipinitialspace=True)
66+
ctor(arg, quotechar='\t', skipinitialspace=True)
67+
self.assertRaises(ValueError, ctor, arg,
68+
delimiter=' ', skipinitialspace=True)
69+
self.assertRaises(ValueError, ctor, arg,
70+
escapechar=' ', skipinitialspace=True)
71+
self.assertRaises(ValueError, ctor, arg,
72+
quotechar=' ', skipinitialspace=True)
73+
ctor(arg, delimiter='^')
74+
ctor(arg, escapechar='^')
75+
ctor(arg, quotechar='^')
76+
self.assertRaises(ValueError, ctor, arg, delimiter='^', escapechar='^')
77+
self.assertRaises(ValueError, ctor, arg, delimiter='^', quotechar='^')
78+
self.assertRaises(ValueError, ctor, arg, escapechar='^', quotechar='^')
79+
ctor(arg, delimiter='\x85')
80+
ctor(arg, escapechar='\x85')
81+
ctor(arg, quotechar='\x85')
82+
ctor(arg, lineterminator='\x85')
83+
self.assertRaises(ValueError, ctor, arg,
84+
delimiter='\x85', lineterminator='\x85')
85+
self.assertRaises(ValueError, ctor, arg,
86+
escapechar='\x85', lineterminator='\x85')
87+
self.assertRaises(ValueError, ctor, arg,
88+
quotechar='\x85', lineterminator='\x85')
4989

5090
def test_reader_arg_valid(self):
5191
self._test_arg_valid(csv.reader, [])
@@ -535,14 +575,6 @@ class unspecified():
535575
finally:
536576
csv.unregister_dialect('testC')
537577

538-
def test_bad_dialect(self):
539-
# Unknown parameter
540-
self.assertRaises(TypeError, csv.reader, [], bad_attr = 0)
541-
# Bad values
542-
self.assertRaises(TypeError, csv.reader, [], delimiter = None)
543-
self.assertRaises(TypeError, csv.reader, [], quoting = -1)
544-
self.assertRaises(TypeError, csv.reader, [], quoting = 100)
545-
546578
def test_copy(self):
547579
for name in csv.list_dialects():
548580
dialect = csv.get_dialect(name)
@@ -1088,10 +1120,15 @@ class mydialect(csv.Dialect):
10881120
'"lineterminator" must be a string')
10891121

10901122
def test_invalid_chars(self):
1091-
def create_invalid(field_name, value):
1123+
def create_invalid(field_name, value, **kwargs):
10921124
class mydialect(csv.Dialect):
1093-
pass
1125+
delimiter = ','
1126+
quoting = csv.QUOTE_ALL
1127+
quotechar = '"'
1128+
lineterminator = '\r\n'
10941129
setattr(mydialect, field_name, value)
1130+
for field_name, value in kwargs.items():
1131+
setattr(mydialect, field_name, value)
10951132
d = mydialect()
10961133

10971134
for field_name in ("delimiter", "escapechar", "quotechar"):
@@ -1100,6 +1137,10 @@ class mydialect(csv.Dialect):
11001137
self.assertRaises(csv.Error, create_invalid, field_name, "abc")
11011138
self.assertRaises(csv.Error, create_invalid, field_name, b'x')
11021139
self.assertRaises(csv.Error, create_invalid, field_name, 5)
1140+
self.assertRaises(ValueError, create_invalid, field_name, "\n")
1141+
self.assertRaises(ValueError, create_invalid, field_name, "\r")
1142+
self.assertRaises(ValueError, create_invalid, field_name, " ",
1143+
skipinitialspace=True)
11031144

11041145

11051146
class TestSniffer(unittest.TestCase):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Add more validation checks in the :class:`csv.Dialect` constructor.
2+
:exc:`ValueError` is now raised if the same character is used in different
3+
roles.

Modules/_csv.c

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,33 @@ dialect_check_quoting(int quoting)
331331
return -1;
332332
}
333333

334+
static int
335+
dialect_check_char(const char *name, Py_UCS4 c, DialectObj *dialect)
336+
{
337+
if (c == '\r' || c == '\n' || (dialect->skipinitialspace && c == ' ')) {
338+
PyErr_Format(PyExc_ValueError, "bad %s value", name);
339+
return -1;
340+
}
341+
if (PyUnicode_FindChar(
342+
dialect->lineterminator, c, 0,
343+
PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0)
344+
{
345+
PyErr_Format(PyExc_ValueError, "bad %s or lineterminator value", name);
346+
return -1;
347+
}
348+
return 0;
349+
}
350+
351+
static int
352+
dialect_check_chars(const char *name1, const char *name2, Py_UCS4 c1, Py_UCS4 c2)
353+
{
354+
if (c1 == c2 && c1 != NOT_SET) {
355+
PyErr_Format(PyExc_ValueError, "bad %s or %s value", name1, name2);
356+
return -1;
357+
}
358+
return 0;
359+
}
360+
334361
#define D_OFF(x) offsetof(DialectObj, x)
335362

336363
static struct PyMemberDef Dialect_memberlist[] = {
@@ -508,6 +535,18 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
508535
PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
509536
goto err;
510537
}
538+
if (dialect_check_char("delimiter", self->delimiter, self) ||
539+
dialect_check_char("escapechar", self->escapechar, self) ||
540+
dialect_check_char("quotechar", self->quotechar, self) ||
541+
dialect_check_chars("delimiter", "escapechar",
542+
self->delimiter, self->escapechar) ||
543+
dialect_check_chars("delimiter", "quotechar",
544+
self->delimiter, self->quotechar) ||
545+
dialect_check_chars("escapechar", "quotechar",
546+
self->escapechar, self->quotechar))
547+
{
548+
goto err;
549+
}
511550

512551
ret = Py_NewRef(self);
513552
err:

0 commit comments

Comments
 (0)