diff --git a/src/libregex/lib.rs b/src/libregex/lib.rs index 4cb51361f27fb..3e100b7889cd1 100644 --- a/src/libregex/lib.rs +++ b/src/libregex/lib.rs @@ -401,7 +401,7 @@ pub mod native { // undesirable consequences (such as requiring a dependency on // `libsyntax`). // - // Secondly, the code generated generated by `regex!` must *also* be able + // Secondly, the code generated by `regex!` must *also* be able // to access various functions in this crate to reduce code duplication // and to provide a value with precisely the same `Regex` type in this // crate. This, AFAIK, is impossible to mitigate. diff --git a/src/libregex/re.rs b/src/libregex/re.rs index f80327c5ec789..91b24888c24fa 100644 --- a/src/libregex/re.rs +++ b/src/libregex/re.rs @@ -100,38 +100,45 @@ pub fn is_match(regex: &str, text: &str) -> Result { /// documentation. #[deriving(Clone)] #[allow(visible_private_types)] -pub struct Regex { - /// The representation of `Regex` is exported to support the `regex!` - /// syntax extension. Do not rely on it. - /// - /// See the comments for the `program` module in `lib.rs` for a more - /// detailed explanation for what `regex!` requires. +pub enum Regex { + // The representation of `Regex` is exported to support the `regex!` + // syntax extension. Do not rely on it. + // + // See the comments for the `program` module in `lib.rs` for a more + // detailed explanation for what `regex!` requires. #[doc(hidden)] - pub original: String, + Dynamic(Dynamic), #[doc(hidden)] - pub names: Vec>, + Native(Native), +} + +#[deriving(Clone)] +#[doc(hidden)] +pub struct Dynamic { + original: String, + names: Vec>, #[doc(hidden)] - pub p: MaybeNative, + pub prog: Program } -impl fmt::Show for Regex { - /// Shows the original regular expression. - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.original) - } +#[doc(hidden)] +pub struct Native { + #[doc(hidden)] + pub original: &'static str, + #[doc(hidden)] + pub names: &'static [Option<&'static str>], + #[doc(hidden)] + pub prog: fn(MatchKind, &str, uint, uint) -> Vec> } -pub enum MaybeNative { - Dynamic(Program), - Native(fn(MatchKind, &str, uint, uint) -> Vec>), +impl Clone for Native { + fn clone(&self) -> Native { *self } } -impl Clone for MaybeNative { - fn clone(&self) -> MaybeNative { - match *self { - Dynamic(ref p) => Dynamic(p.clone()), - Native(fp) => Native(fp), - } +impl fmt::Show for Regex { + /// Shows the original regular expression. + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.as_str()) } } @@ -146,10 +153,11 @@ impl Regex { pub fn new(re: &str) -> Result { let ast = try!(parse::parse(re)); let (prog, names) = Program::new(ast); - Ok(Regex { + Ok(Dynamic(Dynamic { original: re.to_strbuf(), - names: names, p: Dynamic(prog), - }) + names: names, + prog: prog, + })) } /// Returns true if and only if the regex matches the string given. @@ -495,6 +503,46 @@ impl Regex { } new.append(text.slice(last_match, text.len())) } + + /// Returns the original string of this regex. + pub fn as_str<'a>(&'a self) -> &'a str { + match *self { + Dynamic(Dynamic { ref original, .. }) => original.as_slice(), + Native(Native { ref original, .. }) => original.as_slice(), + } + } + + #[doc(hidden)] + #[allow(visible_private_types)] + #[experimental] + pub fn names_iter<'a>(&'a self) -> NamesIter<'a> { + match *self { + Native(ref n) => NamesIterNative(n.names.iter()), + Dynamic(ref d) => NamesIterDynamic(d.names.iter()) + } + } + + fn names_len(&self) -> uint { + match *self { + Native(ref n) => n.names.len(), + Dynamic(ref d) => d.names.len() + } + } + +} + +enum NamesIter<'a> { + NamesIterNative(::std::slice::Items<'a, Option<&'static str>>), + NamesIterDynamic(::std::slice::Items<'a, Option>) +} + +impl<'a> Iterator> for NamesIter<'a> { + fn next(&mut self) -> Option> { + match *self { + NamesIterNative(ref mut i) => i.next().map(|x| x.map(|s| s.to_strbuf())), + NamesIterDynamic(ref mut i) => i.next().map(|x| x.as_ref().map(|s| s.to_strbuf())), + } + } } /// NoExpand indicates literal string replacement. @@ -612,6 +660,7 @@ pub struct Captures<'t> { } impl<'t> Captures<'t> { + #[allow(experimental)] fn new(re: &Regex, search: &'t str, locs: CaptureLocs) -> Option> { if !has_match(&locs) { @@ -619,15 +668,15 @@ impl<'t> Captures<'t> { } let named = - if re.names.len() == 0 { + if re.names_len() == 0 { None } else { let mut named = HashMap::new(); - for (i, name) in re.names.iter().enumerate() { + for (i, name) in re.names_iter().enumerate() { match name { - &None => {}, - &Some(ref name) => { - named.insert(name.to_strbuf(), i); + None => {}, + Some(name) => { + named.insert(name, i); } } } @@ -862,9 +911,9 @@ fn exec(re: &Regex, which: MatchKind, input: &str) -> CaptureLocs { fn exec_slice(re: &Regex, which: MatchKind, input: &str, s: uint, e: uint) -> CaptureLocs { - match re.p { - Dynamic(ref prog) => vm::run(which, prog, input, s, e), - Native(exec) => exec(which, input, s, e), + match *re { + Dynamic(Dynamic { ref prog, .. }) => vm::run(which, prog, input, s, e), + Native(Native { prog, .. }) => prog(which, input, s, e), } } diff --git a/src/libregex/test/mod.rs b/src/libregex/test/mod.rs index a4c3a838a2b70..c563c84fc34ed 100644 --- a/src/libregex/test/mod.rs +++ b/src/libregex/test/mod.rs @@ -20,6 +20,9 @@ mod native_bench; #[path = "tests.rs"] mod native_tests; +#[cfg(not(stage1))] +mod native_static; + // Due to macro scoping rules, this definition only applies for the modules // defined below. Effectively, it allows us to use the same tests for both // native and dynamic regexes. diff --git a/src/libregex/test/native_static.rs b/src/libregex/test/native_static.rs new file mode 100644 index 0000000000000..62e14731c207b --- /dev/null +++ b/src/libregex/test/native_static.rs @@ -0,0 +1,26 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use regex::Regex; +static RE: Regex = regex!(r"\d+"); + +#[test] +fn static_splitn() { + let text = "cauchy123plato456tyler789binx"; + let subs: Vec<&str> = RE.splitn(text, 2).collect(); + assert_eq!(subs, vec!("cauchy", "plato456tyler789binx")); +} + +#[test] +fn static_split() { + let text = "cauchy123plato456tyler789binx"; + let subs: Vec<&str> = RE.split(text).collect(); + assert_eq!(subs, vec!("cauchy", "plato", "tyler", "binx")); +} diff --git a/src/libregex_macros/lib.rs b/src/libregex_macros/lib.rs index f27cba415111b..971b727a165a3 100644 --- a/src/libregex_macros/lib.rs +++ b/src/libregex_macros/lib.rs @@ -75,6 +75,7 @@ pub fn macro_registrar(register: |ast::Name, SyntaxExtension|) { /// It is strongly recommended to read the dynamic implementation in vm.rs /// first before trying to understand the code generator. The implementation /// strategy is identical and vm.rs has comments and will be easier to follow. +#[allow(experimental)] fn native(cx: &mut ExtCtxt, sp: codemap::Span, tts: &[ast::TokenTree]) -> Box { let regex = match parse(cx, tts) { @@ -89,14 +90,14 @@ fn native(cx: &mut ExtCtxt, sp: codemap::Span, tts: &[ast::TokenTree]) return DummyResult::any(sp) } }; - let prog = match re.p { - Dynamic(ref prog) => prog.clone(), + let prog = match re { + Dynamic(Dynamic { ref prog, .. }) => prog.clone(), Native(_) => unreachable!(), }; let mut gen = NfaGen { cx: &*cx, sp: sp, prog: prog, - names: re.names.clone(), original: re.original.clone(), + names: re.names_iter().collect(), original: re.as_str().to_strbuf(), }; MacExpr::new(gen.code()) } @@ -119,7 +120,7 @@ impl<'a> NfaGen<'a> { |cx, name| match *name { Some(ref name) => { let name = name.as_slice(); - quote_expr!(cx, Some($name.to_strbuf())) + quote_expr!(cx, Some($name)) } None => cx.expr_none(self.sp), } @@ -141,9 +142,11 @@ impl<'a> NfaGen<'a> { let regex = self.original.as_slice(); quote_expr!(self.cx, { +static CAP_NAMES: &'static [Option<&'static str>] = &$cap_names; fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str, start: uint, end: uint) -> Vec> { #![allow(unused_imports)] + #![allow(unused_mut)] use regex::native::{ MatchKind, Exists, Location, Submatches, StepState, StepMatchEarlyReturn, StepMatch, StepContinue, @@ -310,11 +313,11 @@ fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str, } } -::regex::Regex { - original: $regex.to_strbuf(), - names: vec!$cap_names, - p: ::regex::native::Native(exec), -} +::regex::native::Native(::regex::native::Native { + original: $regex, + names: CAP_NAMES, + prog: exec, +}) }) }