Skip to content

Commit d93350a

Browse files
committed
syntax: add new 'arbitrary' crate feature
This feature makes all of the AST types derive the 'Arbitrary' trait, which is in turn quite useful for fuzz testing.
1 parent 3cb8918 commit d93350a

File tree

3 files changed

+51
-0
lines changed

3 files changed

+51
-0
lines changed

regex-syntax/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ rust-version = "1.60.0"
1515
[features]
1616
default = ["std", "unicode"]
1717
std = []
18+
arbitrary = ["dep:arbitrary"]
1819

1920
unicode = [
2021
"unicode-age",
@@ -33,6 +34,9 @@ unicode-perl = []
3334
unicode-script = []
3435
unicode-segment = []
3536

37+
[dependencies]
38+
arbitrary = { version = "1.3.0", features = ["derive"], optional = true }
39+
3640
[package.metadata.docs.rs]
3741
# We want to document all features.
3842
all-features = true

regex-syntax/src/ast/mod.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ mod visitor;
2020
/// valid Unicode property name. That particular error is reported when
2121
/// translating an AST to the high-level intermediate representation (`HIR`).
2222
#[derive(Clone, Debug, Eq, PartialEq)]
23+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
2324
pub struct Error {
2425
/// The kind of error.
2526
kind: ErrorKind,
@@ -70,6 +71,7 @@ impl Error {
7071
/// new variant is not considered a breaking change.
7172
#[non_exhaustive]
7273
#[derive(Clone, Debug, Eq, PartialEq)]
74+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
7375
pub enum ErrorKind {
7476
/// The capturing group limit was exceeded.
7577
///
@@ -278,6 +280,7 @@ impl core::fmt::Display for ErrorKind {
278280
/// All span positions are absolute byte offsets that can be used on the
279281
/// original regular expression that was parsed.
280282
#[derive(Clone, Copy, Eq, PartialEq)]
283+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
281284
pub struct Span {
282285
/// The start byte offset.
283286
pub start: Position,
@@ -308,6 +311,7 @@ impl PartialOrd for Span {
308311
/// A position encodes one half of a span, and include the byte offset, line
309312
/// number and column number.
310313
#[derive(Clone, Copy, Eq, PartialEq)]
314+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
311315
pub struct Position {
312316
/// The absolute offset of this position, starting at `0` from the
313317
/// beginning of the regular expression pattern string.
@@ -396,6 +400,7 @@ impl Position {
396400
/// comment contains a span of precisely where it occurred in the original
397401
/// regular expression.
398402
#[derive(Clone, Debug, Eq, PartialEq)]
403+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
399404
pub struct WithComments {
400405
/// The actual ast.
401406
pub ast: Ast,
@@ -408,6 +413,7 @@ pub struct WithComments {
408413
/// A regular expression can only contain comments when the `x` flag is
409414
/// enabled.
410415
#[derive(Clone, Debug, Eq, PartialEq)]
416+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
411417
pub struct Comment {
412418
/// The span of this comment, including the beginning `#` and ending `\n`.
413419
pub span: Span,
@@ -424,6 +430,7 @@ pub struct Comment {
424430
/// This type defines its own destructor that uses constant stack space and
425431
/// heap space proportional to the size of the `Ast`.
426432
#[derive(Clone, Debug, Eq, PartialEq)]
433+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
427434
pub enum Ast {
428435
/// An empty regex that matches everything.
429436
Empty(Span),
@@ -508,6 +515,7 @@ impl core::fmt::Display for Ast {
508515

509516
/// An alternation of regular expressions.
510517
#[derive(Clone, Debug, Eq, PartialEq)]
518+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
511519
pub struct Alternation {
512520
/// The span of this alternation.
513521
pub span: Span,
@@ -532,6 +540,7 @@ impl Alternation {
532540

533541
/// A concatenation of regular expressions.
534542
#[derive(Clone, Debug, Eq, PartialEq)]
543+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
535544
pub struct Concat {
536545
/// The span of this concatenation.
537546
pub span: Span,
@@ -560,6 +569,7 @@ impl Concat {
560569
/// represented in their literal form, e.g., `a` or in their escaped form,
561570
/// e.g., `\x61`.
562571
#[derive(Clone, Debug, Eq, PartialEq)]
572+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
563573
pub struct Literal {
564574
/// The span of this literal.
565575
pub span: Span,
@@ -584,6 +594,7 @@ impl Literal {
584594

585595
/// The kind of a single literal expression.
586596
#[derive(Clone, Debug, Eq, PartialEq)]
597+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
587598
pub enum LiteralKind {
588599
/// The literal is written verbatim, e.g., `a` or `☃`.
589600
Verbatim,
@@ -613,6 +624,7 @@ pub enum LiteralKind {
613624
/// A special literal is a special escape sequence recognized by the regex
614625
/// parser, e.g., `\f` or `\n`.
615626
#[derive(Clone, Debug, Eq, PartialEq)]
627+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
616628
pub enum SpecialLiteralKind {
617629
/// Bell, spelled `\a` (`\x07`).
618630
Bell,
@@ -637,6 +649,7 @@ pub enum SpecialLiteralKind {
637649
/// differ when used without brackets in the number of hex digits that must
638650
/// follow.
639651
#[derive(Clone, Debug, Eq, PartialEq)]
652+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
640653
pub enum HexLiteralKind {
641654
/// A `\x` prefix. When used without brackets, this form is limited to
642655
/// two digits.
@@ -664,6 +677,7 @@ impl HexLiteralKind {
664677

665678
/// A single character class expression.
666679
#[derive(Clone, Debug, Eq, PartialEq)]
680+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
667681
pub enum Class {
668682
/// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
669683
Unicode(ClassUnicode),
@@ -688,6 +702,7 @@ impl Class {
688702

689703
/// A Perl character class.
690704
#[derive(Clone, Debug, Eq, PartialEq)]
705+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
691706
pub struct ClassPerl {
692707
/// The span of this class.
693708
pub span: Span,
@@ -700,6 +715,7 @@ pub struct ClassPerl {
700715

701716
/// The available Perl character classes.
702717
#[derive(Clone, Debug, Eq, PartialEq)]
718+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
703719
pub enum ClassPerlKind {
704720
/// Decimal numbers.
705721
Digit,
@@ -711,6 +727,7 @@ pub enum ClassPerlKind {
711727

712728
/// An ASCII character class.
713729
#[derive(Clone, Debug, Eq, PartialEq)]
730+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
714731
pub struct ClassAscii {
715732
/// The span of this class.
716733
pub span: Span,
@@ -723,6 +740,7 @@ pub struct ClassAscii {
723740

724741
/// The available ASCII character classes.
725742
#[derive(Clone, Debug, Eq, PartialEq)]
743+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
726744
pub enum ClassAsciiKind {
727745
/// `[0-9A-Za-z]`
728746
Alnum,
@@ -786,6 +804,7 @@ impl ClassAsciiKind {
786804

787805
/// A Unicode character class.
788806
#[derive(Clone, Debug, Eq, PartialEq)]
807+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
789808
pub struct ClassUnicode {
790809
/// The span of this class.
791810
pub span: Span,
@@ -821,6 +840,7 @@ impl ClassUnicode {
821840

822841
/// The available forms of Unicode character classes.
823842
#[derive(Clone, Debug, Eq, PartialEq)]
843+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
824844
pub enum ClassUnicodeKind {
825845
/// A one letter abbreviated class, e.g., `\pN`.
826846
OneLetter(char),
@@ -840,6 +860,7 @@ pub enum ClassUnicodeKind {
840860

841861
/// The type of op used in a Unicode character class.
842862
#[derive(Clone, Debug, Eq, PartialEq)]
863+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
843864
pub enum ClassUnicodeOpKind {
844865
/// A property set to a specific value, e.g., `\p{scx=Katakana}`.
845866
Equal,
@@ -862,6 +883,7 @@ impl ClassUnicodeOpKind {
862883

863884
/// A bracketed character class, e.g., `[a-z0-9]`.
864885
#[derive(Clone, Debug, Eq, PartialEq)]
886+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
865887
pub struct ClassBracketed {
866888
/// The span of this class.
867889
pub span: Span,
@@ -880,6 +902,7 @@ pub struct ClassBracketed {
880902
/// items (literals, ranges, other bracketed classes) or a tree of binary set
881903
/// operations.
882904
#[derive(Clone, Debug, Eq, PartialEq)]
905+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
883906
pub enum ClassSet {
884907
/// An item, which can be a single literal, range, nested character class
885908
/// or a union of items.
@@ -913,6 +936,7 @@ impl ClassSet {
913936

914937
/// A single component of a character class set.
915938
#[derive(Clone, Debug, Eq, PartialEq)]
939+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
916940
pub enum ClassSetItem {
917941
/// An empty item.
918942
///
@@ -956,6 +980,7 @@ impl ClassSetItem {
956980

957981
/// A single character class range in a set.
958982
#[derive(Clone, Debug, Eq, PartialEq)]
983+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
959984
pub struct ClassSetRange {
960985
/// The span of this range.
961986
pub span: Span,
@@ -977,6 +1002,7 @@ impl ClassSetRange {
9771002

9781003
/// A union of items inside a character class set.
9791004
#[derive(Clone, Debug, Eq, PartialEq)]
1005+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
9801006
pub struct ClassSetUnion {
9811007
/// The span of the items in this operation. e.g., the `a-z0-9` in
9821008
/// `[^a-z0-9]`
@@ -1021,6 +1047,7 @@ impl ClassSetUnion {
10211047

10221048
/// A Unicode character class set operation.
10231049
#[derive(Clone, Debug, Eq, PartialEq)]
1050+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
10241051
pub struct ClassSetBinaryOp {
10251052
/// The span of this operation. e.g., the `a-z--[h-p]` in `[a-z--h-p]`.
10261053
pub span: Span,
@@ -1038,6 +1065,7 @@ pub struct ClassSetBinaryOp {
10381065
/// explicit union operator. Concatenation inside a character class corresponds
10391066
/// to the union operation.
10401067
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1068+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
10411069
pub enum ClassSetBinaryOpKind {
10421070
/// The intersection of two sets, e.g., `\pN&&[a-z]`.
10431071
Intersection,
@@ -1051,6 +1079,7 @@ pub enum ClassSetBinaryOpKind {
10511079

10521080
/// A single zero-width assertion.
10531081
#[derive(Clone, Debug, Eq, PartialEq)]
1082+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
10541083
pub struct Assertion {
10551084
/// The span of this assertion.
10561085
pub span: Span,
@@ -1060,6 +1089,7 @@ pub struct Assertion {
10601089

10611090
/// An assertion kind.
10621091
#[derive(Clone, Debug, Eq, PartialEq)]
1092+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
10631093
pub enum AssertionKind {
10641094
/// `^`
10651095
StartLine,
@@ -1077,6 +1107,7 @@ pub enum AssertionKind {
10771107

10781108
/// A repetition operation applied to a regular expression.
10791109
#[derive(Clone, Debug, Eq, PartialEq)]
1110+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
10801111
pub struct Repetition {
10811112
/// The span of this operation.
10821113
pub span: Span,
@@ -1090,6 +1121,7 @@ pub struct Repetition {
10901121

10911122
/// The repetition operator itself.
10921123
#[derive(Clone, Debug, Eq, PartialEq)]
1124+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
10931125
pub struct RepetitionOp {
10941126
/// The span of this operator. This includes things like `+`, `*?` and
10951127
/// `{m,n}`.
@@ -1100,6 +1132,7 @@ pub struct RepetitionOp {
11001132

11011133
/// The kind of a repetition operator.
11021134
#[derive(Clone, Debug, Eq, PartialEq)]
1135+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
11031136
pub enum RepetitionKind {
11041137
/// `?`
11051138
ZeroOrOne,
@@ -1113,6 +1146,7 @@ pub enum RepetitionKind {
11131146

11141147
/// A range repetition operator.
11151148
#[derive(Clone, Debug, Eq, PartialEq)]
1149+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
11161150
pub enum RepetitionRange {
11171151
/// `{m}`
11181152
Exactly(u32),
@@ -1142,6 +1176,7 @@ impl RepetitionRange {
11421176
/// contains a sub-expression, e.g., `(a)`, `(?P<name>a)`, `(?:a)` and
11431177
/// `(?is:a)`.
11441178
#[derive(Clone, Debug, Eq, PartialEq)]
1179+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
11451180
pub struct Group {
11461181
/// The span of this group.
11471182
pub span: Span,
@@ -1183,6 +1218,7 @@ impl Group {
11831218

11841219
/// The kind of a group.
11851220
#[derive(Clone, Debug, Eq, PartialEq)]
1221+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
11861222
pub enum GroupKind {
11871223
/// `(a)`
11881224
CaptureIndex(u32),
@@ -1202,6 +1238,7 @@ pub enum GroupKind {
12021238
/// This corresponds to the name itself between the angle brackets in, e.g.,
12031239
/// `(?P<foo>expr)`.
12041240
#[derive(Clone, Debug, Eq, PartialEq)]
1241+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
12051242
pub struct CaptureName {
12061243
/// The span of this capture name.
12071244
pub span: Span,
@@ -1213,6 +1250,7 @@ pub struct CaptureName {
12131250

12141251
/// A group of flags that is not applied to a particular regular expression.
12151252
#[derive(Clone, Debug, Eq, PartialEq)]
1253+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
12161254
pub struct SetFlags {
12171255
/// The span of these flags, including the grouping parentheses.
12181256
pub span: Span,
@@ -1224,6 +1262,7 @@ pub struct SetFlags {
12241262
///
12251263
/// This corresponds only to the sequence of flags themselves, e.g., `is-u`.
12261264
#[derive(Clone, Debug, Eq, PartialEq)]
1265+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
12271266
pub struct Flags {
12281267
/// The span of this group of flags.
12291268
pub span: Span,
@@ -1276,6 +1315,7 @@ impl Flags {
12761315

12771316
/// A single item in a group of flags.
12781317
#[derive(Clone, Debug, Eq, PartialEq)]
1318+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
12791319
pub struct FlagsItem {
12801320
/// The span of this item.
12811321
pub span: Span,
@@ -1285,6 +1325,7 @@ pub struct FlagsItem {
12851325

12861326
/// The kind of an item in a group of flags.
12871327
#[derive(Clone, Debug, Eq, PartialEq)]
1328+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
12881329
pub enum FlagsItemKind {
12891330
/// A negation operator applied to all subsequent flags in the enclosing
12901331
/// group.
@@ -1305,6 +1346,7 @@ impl FlagsItemKind {
13051346

13061347
/// A single flag.
13071348
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1349+
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
13081350
pub enum Flag {
13091351
/// `i`
13101352
CaseInsensitive,

regex-syntax/src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,11 @@ The following features are available:
157157
[Unicode text segmentation algorithms](https://www.unicode.org/reports/tr29/).
158158
This enables using classes like `\p{gcb=Extend}`, `\p{wb=Katakana}` and
159159
`\p{sb=ATerm}`.
160+
* **arbitrary** -
161+
Enabling this feature introduces a public dependency on the
162+
[`arbitrary`](https://crates.io/crates/arbitrary)
163+
crate. Namely, it implements the `Arbitrary` trait from that crate for the
164+
[`Ast`](crate::ast::Ast) type. This feature is disabled by default.
160165
*/
161166

162167
#![no_std]

0 commit comments

Comments
 (0)