1
- // Copyright (C) 2020-2023 Jonathan Müller and lexy contributors
1
+ // Copyright (C) 2020-2024 Jonathan Müller and lexy contributors
2
2
// SPDX-License-Identifier: BSL-1.0
3
3
4
4
#ifndef LEXY_DETAIL_CODE_POINT_HPP_INCLUDED
@@ -133,9 +133,9 @@ enum class cp_error
133
133
template <typename Reader>
134
134
struct cp_result
135
135
{
136
- char32_t cp;
137
- cp_error error;
138
- typename Reader::iterator end;
136
+ char32_t cp;
137
+ cp_error error;
138
+ typename Reader::marker end;
139
139
};
140
140
141
141
template <typename Reader>
@@ -144,16 +144,16 @@ constexpr cp_result<Reader> parse_code_point(Reader reader)
144
144
if constexpr (std::is_same_v<typename Reader::encoding, lexy::ascii_encoding>)
145
145
{
146
146
if (reader.peek () == Reader::encoding::eof ())
147
- return {{}, cp_error::eof, reader.position ()};
147
+ return {{}, cp_error::eof, reader.current ()};
148
148
149
149
auto cur = reader.peek ();
150
150
reader.bump ();
151
151
152
152
auto cp = static_cast <char32_t >(cur);
153
153
if (cp <= 0x7F )
154
- return {cp, cp_error::success, reader.position ()};
154
+ return {cp, cp_error::success, reader.current ()};
155
155
else
156
- return {cp, cp_error::out_of_range, reader.position ()};
156
+ return {cp, cp_error::out_of_range, reader.current ()};
157
157
}
158
158
else if constexpr (std::is_same_v<typename Reader::encoding, lexy::utf8_encoding> //
159
159
|| std::is_same_v<typename Reader::encoding, lexy::utf8_char_encoding>)
@@ -176,19 +176,19 @@ constexpr cp_result<Reader> parse_code_point(Reader reader)
176
176
{
177
177
// ASCII character.
178
178
reader.bump ();
179
- return {first, cp_error::success, reader.position ()};
179
+ return {first, cp_error::success, reader.current ()};
180
180
}
181
181
else if ((first & ~payload_cont) == pattern_cont)
182
182
{
183
- return {{}, cp_error::leads_with_trailing, reader.position ()};
183
+ return {{}, cp_error::leads_with_trailing, reader.current ()};
184
184
}
185
185
else if ((first & ~payload_lead2) == pattern_lead2)
186
186
{
187
187
reader.bump ();
188
188
189
189
auto second = uchar_t (reader.peek ());
190
190
if ((second & ~payload_cont) != pattern_cont)
191
- return {{}, cp_error::missing_trailing, reader.position ()};
191
+ return {{}, cp_error::missing_trailing, reader.current ()};
192
192
reader.bump ();
193
193
194
194
auto result = char32_t (first & payload_lead2);
@@ -197,22 +197,22 @@ constexpr cp_result<Reader> parse_code_point(Reader reader)
197
197
198
198
// C0 and C1 are overlong ASCII.
199
199
if (first == 0xC0 || first == 0xC1 )
200
- return {result, cp_error::overlong_sequence, reader.position ()};
200
+ return {result, cp_error::overlong_sequence, reader.current ()};
201
201
else
202
- return {result, cp_error::success, reader.position ()};
202
+ return {result, cp_error::success, reader.current ()};
203
203
}
204
204
else if ((first & ~payload_lead3) == pattern_lead3)
205
205
{
206
206
reader.bump ();
207
207
208
208
auto second = uchar_t (reader.peek ());
209
209
if ((second & ~payload_cont) != pattern_cont)
210
- return {{}, cp_error::missing_trailing, reader.position ()};
210
+ return {{}, cp_error::missing_trailing, reader.current ()};
211
211
reader.bump ();
212
212
213
213
auto third = uchar_t (reader.peek ());
214
214
if ((third & ~payload_cont) != pattern_cont)
215
- return {{}, cp_error::missing_trailing, reader.position ()};
215
+ return {{}, cp_error::missing_trailing, reader.current ()};
216
216
reader.bump ();
217
217
218
218
auto result = char32_t (first & payload_lead3);
@@ -223,29 +223,29 @@ constexpr cp_result<Reader> parse_code_point(Reader reader)
223
223
224
224
auto cp = result;
225
225
if (0xD800 <= cp && cp <= 0xDFFF )
226
- return {cp, cp_error::surrogate, reader.position ()};
226
+ return {cp, cp_error::surrogate, reader.current ()};
227
227
else if (first == 0xE0 && second < 0xA0 )
228
- return {cp, cp_error::overlong_sequence, reader.position ()};
228
+ return {cp, cp_error::overlong_sequence, reader.current ()};
229
229
else
230
- return {cp, cp_error::success, reader.position ()};
230
+ return {cp, cp_error::success, reader.current ()};
231
231
}
232
232
else if ((first & ~payload_lead4) == pattern_lead4)
233
233
{
234
234
reader.bump ();
235
235
236
236
auto second = uchar_t (reader.peek ());
237
237
if ((second & ~payload_cont) != pattern_cont)
238
- return {{}, cp_error::missing_trailing, reader.position ()};
238
+ return {{}, cp_error::missing_trailing, reader.current ()};
239
239
reader.bump ();
240
240
241
241
auto third = uchar_t (reader.peek ());
242
242
if ((third & ~payload_cont) != pattern_cont)
243
- return {{}, cp_error::missing_trailing, reader.position ()};
243
+ return {{}, cp_error::missing_trailing, reader.current ()};
244
244
reader.bump ();
245
245
246
246
auto fourth = uchar_t (reader.peek ());
247
247
if ((fourth & ~payload_cont) != pattern_cont)
248
- return {{}, cp_error::missing_trailing, reader.position ()};
248
+ return {{}, cp_error::missing_trailing, reader.current ()};
249
249
reader.bump ();
250
250
251
251
auto result = char32_t (first & payload_lead4);
@@ -258,15 +258,15 @@ constexpr cp_result<Reader> parse_code_point(Reader reader)
258
258
259
259
auto cp = result;
260
260
if (cp > 0x10'FFFF )
261
- return {cp, cp_error::out_of_range, reader.position ()};
261
+ return {cp, cp_error::out_of_range, reader.current ()};
262
262
else if (first == 0xF0 && second < 0x90 )
263
- return {cp, cp_error::overlong_sequence, reader.position ()};
263
+ return {cp, cp_error::overlong_sequence, reader.current ()};
264
264
else
265
- return {cp, cp_error::success, reader.position ()};
265
+ return {cp, cp_error::success, reader.current ()};
266
266
}
267
267
else // FE or FF
268
268
{
269
- return {{}, cp_error::eof, reader.position ()};
269
+ return {{}, cp_error::eof, reader.current ()};
270
270
}
271
271
}
272
272
else if constexpr (std::is_same_v<typename Reader::encoding, lexy::utf16_encoding>)
@@ -278,53 +278,53 @@ constexpr cp_result<Reader> parse_code_point(Reader reader)
278
278
constexpr auto pattern2 = 0b110111 << 10 ;
279
279
280
280
if (reader.peek () == Reader::encoding::eof ())
281
- return {{}, cp_error::eof, reader.position ()};
281
+ return {{}, cp_error::eof, reader.current ()};
282
282
283
283
auto first = char16_t (reader.peek ());
284
284
if ((first & ~payload1) == pattern1)
285
285
{
286
286
reader.bump ();
287
287
if (reader.peek () == Reader::encoding::eof ())
288
- return {{}, cp_error::missing_trailing, reader.position ()};
288
+ return {{}, cp_error::missing_trailing, reader.current ()};
289
289
290
290
auto second = char16_t (reader.peek ());
291
291
if ((second & ~payload2) != pattern2)
292
- return {{}, cp_error::missing_trailing, reader.position ()};
292
+ return {{}, cp_error::missing_trailing, reader.current ()};
293
293
reader.bump ();
294
294
295
295
// We've got a valid code point.
296
296
auto result = char32_t (first & payload1);
297
297
result <<= 10 ;
298
298
result |= char32_t (second & payload2);
299
299
result |= 0x10000 ;
300
- return {result, cp_error::success, reader.position ()};
300
+ return {result, cp_error::success, reader.current ()};
301
301
}
302
302
else if ((first & ~payload2) == pattern2)
303
303
{
304
- return {{}, cp_error::leads_with_trailing, reader.position ()};
304
+ return {{}, cp_error::leads_with_trailing, reader.current ()};
305
305
}
306
306
else
307
307
{
308
308
// Single code unit code point; always valid.
309
309
reader.bump ();
310
- return {first, cp_error::success, reader.position ()};
310
+ return {first, cp_error::success, reader.current ()};
311
311
}
312
312
}
313
313
else if constexpr (std::is_same_v<typename Reader::encoding, lexy::utf32_encoding>)
314
314
{
315
315
if (reader.peek () == Reader::encoding::eof ())
316
- return {{}, cp_error::eof, reader.position ()};
316
+ return {{}, cp_error::eof, reader.current ()};
317
317
318
318
auto cur = reader.peek ();
319
319
reader.bump ();
320
320
321
321
auto cp = cur;
322
322
if (cp > 0x10'FFFF )
323
- return {cp, cp_error::out_of_range, reader.position ()};
323
+ return {cp, cp_error::out_of_range, reader.current ()};
324
324
else if (0xD800 <= cp && cp <= 0xDFFF )
325
- return {cp, cp_error::surrogate, reader.position ()};
325
+ return {cp, cp_error::surrogate, reader.current ()};
326
326
else
327
- return {cp, cp_error::success, reader.position ()};
327
+ return {cp, cp_error::success, reader.current ()};
328
328
}
329
329
else
330
330
{
@@ -341,15 +341,15 @@ constexpr void recover_code_point(Reader& reader, cp_result<Reader> result)
341
341
{
342
342
case cp_error::success:
343
343
// Consume the entire code point.
344
- reader.set_position (result.end );
344
+ reader.reset (result.end );
345
345
break ;
346
346
case cp_error::eof:
347
347
// We don't need to do anything to "recover" from EOF.
348
348
break ;
349
349
350
350
case cp_error::leads_with_trailing:
351
351
// Invalid code unit, consume to recover.
352
- LEXY_PRECONDITION (result.end == reader.position ());
352
+ LEXY_PRECONDITION (result.end . position () == reader.position ());
353
353
reader.bump ();
354
354
break ;
355
355
@@ -358,7 +358,7 @@ constexpr void recover_code_point(Reader& reader, cp_result<Reader> result)
358
358
case cp_error::out_of_range:
359
359
case cp_error::overlong_sequence:
360
360
// Consume all the invalid code units to recover.
361
- reader.set_position (result.end );
361
+ reader.reset (result.end );
362
362
break ;
363
363
}
364
364
}
0 commit comments