Skip to content

Remove regex module from libunicode #24339

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 13, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 7 additions & 44 deletions src/etc/unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,33 +305,13 @@ def emit_table(f, name, t_data, t_type = "&'static [(char, char)]", is_pub=True,
format_table_content(f, data, 8)
f.write("\n ];\n\n")

def emit_property_module(f, mod, tbl, emit_fn):
def emit_property_module(f, mod, tbl, emit):
f.write("pub mod %s {\n" % mod)
keys = tbl.keys()
keys.sort()
for cat in keys:
for cat in sorted(emit):
emit_table(f, "%s_table" % cat, tbl[cat])
if cat in emit_fn:
f.write(" pub fn %s(c: char) -> bool {\n" % cat)
f.write(" super::bsearch_range_table(c, %s_table)\n" % cat)
f.write(" }\n\n")
f.write("}\n\n")

def emit_regex_module(f, cats, w_data):
f.write("pub mod regex {\n")
regex_class = "&'static [(char, char)]"
class_table = "&'static [(&'static str, %s)]" % regex_class

emit_table(f, "UNICODE_CLASSES", cats, class_table,
pfun=lambda x: "(\"%s\",super::%s::%s_table)" % (x[0], x[1], x[0]))

f.write(" pub const PERLD: %s = super::general_category::Nd_table;\n\n"
% regex_class)
f.write(" pub const PERLS: %s = super::property::White_Space_table;\n\n"
% regex_class)

emit_table(f, "PERLW", w_data, regex_class)

f.write(" pub fn %s(c: char) -> bool {\n" % cat)
f.write(" super::bsearch_range_table(c, %s_table)\n" % cat)
f.write(" }\n\n")
f.write("}\n\n")

def emit_conversions_module(f, lowerupper, upperlower):
Expand Down Expand Up @@ -605,8 +585,7 @@ def optimize_width_table(wtable):
(canon_decomp, compat_decomp, gencats, combines,
lowerupper, upperlower) = load_unicode_data("UnicodeData.txt")
want_derived = ["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase"]
other_derived = ["Default_Ignorable_Code_Point"]
derived = load_properties("DerivedCoreProperties.txt", want_derived + other_derived)
derived = load_properties("DerivedCoreProperties.txt", want_derived)
scripts = load_properties("Scripts.txt", [])
props = load_properties("PropList.txt",
["White_Space", "Join_Control", "Noncharacter_Code_Point"])
Expand All @@ -616,27 +595,11 @@ def optimize_width_table(wtable):
# bsearch_range_table is used in all the property modules below
emit_bsearch_range_table(rf)

# all of these categories will also be available as \p{} in libregex
allcats = []
# category tables
for (name, cat, pfuns) in ("general_category", gencats, ["N", "Cc"]), \
("derived_property", derived, want_derived), \
("script", scripts, []), \
("property", props, ["White_Space"]):
emit_property_module(rf, name, cat, pfuns)
allcats.extend(map(lambda x: (x, name), cat))
allcats.sort(key=lambda c: c[0])

# the \w regex corresponds to Alphabetic + Mark + Decimal_Number +
# Connector_Punctuation + Join-Control according to UTS#18
# http://www.unicode.org/reports/tr18/#Compatibility_Properties
perl_words = []
for cat in derived["Alphabetic"], gencats["M"], gencats["Nd"], \
gencats["Pc"], props["Join_Control"]:
perl_words.extend(ungroup_cat(cat))
perl_words = group_cat(perl_words)

# emit lookup tables for \p{}, along with \d, \w, and \s for libregex
emit_regex_module(rf, allcats, perl_words)

# normalizations and conversions module
emit_norm_module(rf, canon_decomp, compat_decomp, combines, norm_props)
Expand Down
3 changes: 0 additions & 3 deletions src/libunicode/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,6 @@

extern crate core;

// regex module
pub use tables::regex;

mod normalize;
mod tables;
mod u_str;
Expand Down
Loading