Skip to content

After #164 lazy DFA update, certain Regex fails to match #169

Closed
@jnicholls

Description

@jnicholls

The below example code/regex no longer matches after the new lazy DFA work:

let re = Regex::new(r"(?m:(\s*pub _bindgen_bitfield_\d+_: \w+,\s*\n)(\s*pub _bindgen_bitfield_\d+_: \w+,\s*\n)+)").unwrap();
let code = re.replace_all(&code, "$1");

The below code should match...

pub struct Struct_ndpi_flow_struct {
    pub detected_protocol_stack: [u_int16_t; 2usize],
    pub protocol_stack_info: u_int16_t,
    pub guessed_protocol_id: u_int16_t,
    pub guessed_host_proto_id: u_int16_t,
    pub _bindgen_bitfield_1_: u_int8_t,
    pub _bindgen_bitfield_2_: u_int8_t,
    pub _bindgen_bitfield_3_: u_int8_t,
    pub _bindgen_bitfield_4_: u_int8_t,
    pub _bindgen_bitfield_5_: u_int8_t,
    pub next_tcp_seq_nr: [u_int32_t; 2usize],
    pub l4: Union_Unnamed72,
    pub server_id: *mut Struct_ndpi_id_struct,
    pub host_server_name: [u_char; 256usize],
    pub detected_os: [u_char; 32usize],
    pub nat_ip: [u_char; 24usize],
    pub http: Struct_Unnamed73,
    pub protos: Union_Unnamed74,
    pub excluded_protocol_bitmask: ndpi_protocol_bitmask_struct_t,
    pub num_stun_udp_pkts: u_int8_t,
    pub redis_s2d_first_char: u_int8_t,
    pub redis_d2s_first_char: u_int8_t,
    pub packet_counter: u_int16_t,
    pub packet_direction_counter: [u_int16_t; 2usize],
    pub byte_counter: [u_int16_t; 2usize],
    pub bittorrent_stage: u_int8_t,
    pub _bindgen_bitfield_6_: u_int32_t,
    pub _bindgen_bitfield_7_: u_int32_t,
    pub _bindgen_bitfield_8_: u_int32_t,
    pub _bindgen_bitfield_9_: u_int32_t,
    pub _bindgen_bitfield_10_: u_int32_t,
    pub _bindgen_bitfield_11_: u_int32_t,
    pub _bindgen_bitfield_12_: u_int32_t,
    pub _bindgen_bitfield_13_: u_int32_t,
    pub _bindgen_bitfield_14_: u_int32_t,
    pub _bindgen_bitfield_15_: u_int32_t,
    pub _bindgen_bitfield_16_: u_int32_t,
    pub _bindgen_bitfield_17_: u_int32_t,
    pub _bindgen_bitfield_18_: u_int32_t,
    pub _bindgen_bitfield_19_: u_int32_t,
    pub _bindgen_bitfield_20_: u_int32_t,
    pub _bindgen_bitfield_21_: u_int32_t,
    pub _bindgen_bitfield_22_: u_int32_t,
    pub _bindgen_bitfield_23_: u_int32_t,
    pub _bindgen_bitfield_24_: u_int32_t,
    pub _bindgen_bitfield_25_: u_int32_t,
    pub _bindgen_bitfield_26_: u_int32_t,
    pub _bindgen_bitfield_27_: u_int32_t,
    pub _bindgen_bitfield_28_: u_int32_t,
    pub _bindgen_bitfield_29_: u_int32_t,
    pub _bindgen_bitfield_30_: u_int32_t,
    pub _bindgen_bitfield_31_: u_int32_t,
    pub packet: Struct_ndpi_packet_struct,
    pub flow: *mut Struct_ndpi_flow_struct,
    pub src: *mut Struct_ndpi_id_struct,
    pub dst: *mut Struct_ndpi_id_struct,
}

and become the following after replacement...

pub struct Struct_ndpi_flow_struct {
    pub detected_protocol_stack: [u_int16_t; 2usize],
    pub protocol_stack_info: u_int16_t,
    pub guessed_protocol_id: u_int16_t,
    pub guessed_host_proto_id: u_int16_t,
    pub _bindgen_bitfield_1_: u_int8_t,
    pub next_tcp_seq_nr: [u_int32_t; 2usize],
    pub l4: Union_Unnamed72,
    pub server_id: *mut Struct_ndpi_id_struct,
    pub host_server_name: [u_char; 256usize],
    pub detected_os: [u_char; 32usize],
    pub nat_ip: [u_char; 24usize],
    pub http: Struct_Unnamed73,
    pub protos: Union_Unnamed74,
    pub excluded_protocol_bitmask: ndpi_protocol_bitmask_struct_t,
    pub num_stun_udp_pkts: u_int8_t,
    pub redis_s2d_first_char: u_int8_t,
    pub redis_d2s_first_char: u_int8_t,
    pub packet_counter: u_int16_t,
    pub packet_direction_counter: [u_int16_t; 2usize],
    pub byte_counter: [u_int16_t; 2usize],
    pub bittorrent_stage: u_int8_t,
    pub _bindgen_bitfield_6_: u_int32_t,
    pub packet: Struct_ndpi_packet_struct,
    pub flow: *mut Struct_ndpi_flow_struct,
    pub src: *mut Struct_ndpi_id_struct,
    pub dst: *mut Struct_ndpi_id_struct,
}

This code is to work around an issue with rust-bindgen by replacing extra generated bitfields.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions