From 713af75c221a26909d541772a3d6592d633e82a9 Mon Sep 17 00:00:00 2001
From: Florian Zeitz <florob@babelmonkeys.de>
Date: Fri, 28 Feb 2014 20:10:42 +0100
Subject: [PATCH 1/5] Update unicode.py to actually generate unicode.rs

---
 src/etc/unicode.py | 1 +
 1 file changed, 1 insertion(+)
diff --git a/src/etc/unicode.py b/src/etc/unicode.py
index 71c5c8f8a9527..cce302ed972b4 100755
--- a/src/etc/unicode.py
+++ b/src/etc/unicode.py
@@ -376,6 +376,7 @@ def emit_decomp_module(f, canon, compat, combine):
 
 #[allow(missing_doc)];
 #[allow(non_uppercase_statics)];
+#[allow(dead_code)];
 
 ''')
 

From ab05d1ad6ab19650d8b1f24a756e38d36b50bca5 Mon Sep 17 00:00:00 2001
From: Florian Zeitz <florob@babelmonkeys.de>
Date: Sat, 1 Mar 2014 22:36:30 +0100
Subject: [PATCH 2/5] std: Use appropriately sized integers for codepoints and
 bytes

---
 src/libstd/char.rs | 72 ++++++++++++++++++++++------------------------
 1 file changed, 34 insertions(+), 38 deletions(-)

diff --git a/src/libstd/char.rs b/src/libstd/char.rs
index 4c0f77586db44..05a503ecffcc9 100644
--- a/src/libstd/char.rs
+++ b/src/libstd/char.rs
@@ -36,13 +36,14 @@ use unicode::{derived_property, property, general_category, decompose};
 #[cfg(not(test))] use default::Default;
 
 // UTF-8 ranges and tags for encoding characters
-static TAG_CONT: uint = 128u;
-static MAX_ONE_B: uint = 128u;
-static TAG_TWO_B: uint = 192u;
-static MAX_TWO_B: uint = 2048u;
-static TAG_THREE_B: uint = 224u;
-static MAX_THREE_B: uint = 65536u;
-static TAG_FOUR_B: uint = 240u;
+static TAG_CONT: u8 = 128u8;
+static MAX_ONE_B: u32 = 128u32;
+static TAG_TWO_B: u8 = 192u8;
+static MAX_TWO_B: u32 = 2048u32;
+static TAG_THREE_B: u8 = 224u8;
+static MAX_THREE_B: u32 = 65536u32;
+static TAG_FOUR_B: u8 = 240u8;
+static MAX_FOUR_B:  u32 = 2097152u32;
 
 /*
     Lu  Uppercase_Letter        an uppercase letter
@@ -256,37 +257,37 @@ pub fn from_digit(num: uint, radix: uint) -> Option<char> {
 }
 
 // Constants from Unicode 6.2.0 Section 3.12 Conjoining Jamo Behavior
-static S_BASE: uint = 0xAC00;
-static L_BASE: uint = 0x1100;
-static V_BASE: uint = 0x1161;
-static T_BASE: uint = 0x11A7;
-static L_COUNT: uint = 19;
-static V_COUNT: uint = 21;
-static T_COUNT: uint = 28;
-static N_COUNT: uint = (V_COUNT * T_COUNT);
-static S_COUNT: uint = (L_COUNT * N_COUNT);
+static S_BASE: u32 = 0xAC00;
+static L_BASE: u32 = 0x1100;
+static V_BASE: u32 = 0x1161;
+static T_BASE: u32 = 0x11A7;
+static L_COUNT: u32 = 19;
+static V_COUNT: u32 = 21;
+static T_COUNT: u32 = 28;
+static N_COUNT: u32 = (V_COUNT * T_COUNT);
+static S_COUNT: u32 = (L_COUNT * N_COUNT);
 
 // Decompose a precomposed Hangul syllable
 fn decompose_hangul(s: char, f: |char|) {
-    let si = s as uint - S_BASE;
+    let si = s as u32 - S_BASE;
 
     let li = si / N_COUNT;
     unsafe {
-        f(transmute((L_BASE + li) as u32));
+        f(transmute(L_BASE + li));
 
         let vi = (si % N_COUNT) / T_COUNT;
-        f(transmute((V_BASE + vi) as u32));
+        f(transmute(V_BASE + vi));
 
         let ti = si % T_COUNT;
         if ti > 0 {
-            f(transmute((T_BASE + ti) as u32));
+            f(transmute(T_BASE + ti));
         }
     }
 }
 
 /// Returns the canonical decomposition of a character
 pub fn decompose_canonical(c: char, f: |char|) {
-    if (c as uint) < S_BASE || (c as uint) >= (S_BASE + S_COUNT) {
+    if (c as u32) < S_BASE || (c as u32) >= (S_BASE + S_COUNT) {
         decompose::canonical(c, f);
     } else {
         decompose_hangul(c, f);
@@ -295,7 +296,7 @@ pub fn decompose_canonical(c: char, f: |char|) {
 
 /// Returns the compatibility decomposition of a character
 pub fn decompose_compatible(c: char, f: |char|) {
-    if (c as uint) < S_BASE || (c as uint) >= (S_BASE + S_COUNT) {
+    if (c as u32) < S_BASE || (c as u32) >= (S_BASE + S_COUNT) {
         decompose::compatibility(c, f);
     } else {
         decompose_hangul(c, f);
@@ -357,12 +358,7 @@ pub fn escape_default(c: char, f: |char|) {
 
 /// Returns the amount of bytes this `char` would need if encoded in UTF-8
 pub fn len_utf8_bytes(c: char) -> uint {
-    static MAX_ONE_B:   uint = 128u;
-    static MAX_TWO_B:   uint = 2048u;
-    static MAX_THREE_B: uint = 65536u;
-    static MAX_FOUR_B:  uint = 2097152u;
-
-    let code = c as uint;
+    let code = c as u32;
     match () {
         _ if code < MAX_ONE_B   => 1u,
         _ if code < MAX_TWO_B   => 2u,
@@ -430,24 +426,24 @@ impl Char for char {
     fn len_utf8_bytes(&self) -> uint { len_utf8_bytes(*self) }
 
     fn encode_utf8<'a>(&self, dst: &'a mut [u8]) -> uint {
-        let code = *self as uint;
+        let code = *self as u32;
         if code < MAX_ONE_B {
             dst[0] = code as u8;
             return 1;
         } else if code < MAX_TWO_B {
-            dst[0] = (code >> 6u & 31u | TAG_TWO_B) as u8;
-            dst[1] = (code & 63u | TAG_CONT) as u8;
+            dst[0] = (code >> 6u & 31u32) as u8 | TAG_TWO_B;
+            dst[1] = (code & 63u32) as u8 | TAG_CONT;
             return 2;
         } else if code < MAX_THREE_B {
-            dst[0] = (code >> 12u & 15u | TAG_THREE_B) as u8;
-            dst[1] = (code >> 6u & 63u | TAG_CONT) as u8;
-            dst[2] = (code & 63u | TAG_CONT) as u8;
+            dst[0] = (code >> 12u & 15u32) as u8 | TAG_THREE_B;
+            dst[1] = (code >> 6u & 63u32) as u8 | TAG_CONT;
+            dst[2] = (code & 63u32) as u8 | TAG_CONT;
             return 3;
         } else {
-            dst[0] = (code >> 18u & 7u | TAG_FOUR_B) as u8;
-            dst[1] = (code >> 12u & 63u | TAG_CONT) as u8;
-            dst[2] = (code >> 6u & 63u | TAG_CONT) as u8;
-            dst[3] = (code & 63u | TAG_CONT) as u8;
+            dst[0] = (code >> 18u & 7u32) as u8 | TAG_FOUR_B;
+            dst[1] = (code >> 12u & 63u32) as u8 | TAG_CONT;
+            dst[2] = (code >> 6u & 63u32) as u8 | TAG_CONT;
+            dst[3] = (code & 63u32) as u8 | TAG_CONT;
             return 4;
         }
     }

From bb587d9791133645eccb118341dee0798996fb50 Mon Sep 17 00:00:00 2001
From: Florian Zeitz <florob@babelmonkeys.de>
Date: Thu, 6 Mar 2014 04:41:43 +0100
Subject: [PATCH 3/5] std: Move Hangul decomposition into unicode.rs

---
 src/etc/unicode.py    | 86 +++++++++++++++++++++++++++++++------------
 src/libstd/char.rs    | 54 +++------------------------
 src/libstd/unicode.rs | 48 +++++++++++++++++++++++-
 3 files changed, 115 insertions(+), 73 deletions(-)

diff --git a/src/etc/unicode.py b/src/etc/unicode.py
index cce302ed972b4..7b93e10b26549 100755
--- a/src/etc/unicode.py
+++ b/src/etc/unicode.py
@@ -309,20 +309,28 @@ def emit_decomp_module(f, canon, compat, combine):
         ix += 1
     f.write("\n    ];\n")
 
-    f.write("    pub fn canonical(c: char, i: |char|) "
-        + "{ d(c, i, false); }\n\n")
-    f.write("    pub fn compatibility(c: char, i: |char|) "
-            +"{ d(c, i, true); }\n\n")
-    f.write("    pub fn canonical_combining_class(c: char) -> u8 {\n"
-        + "        bsearch_range_value_table(c, combining_class_table)\n"
-        + "    }\n\n")
-    f.write("    fn d(c: char, i: |char|, k: bool) {\n")
-    f.write("        use iter::Iterator;\n");
-
-    f.write("        if c <= '\\x7f' { i(c); return; }\n")
-
-    # First check the canonical decompositions
     f.write("""
+    pub fn decompose_canonical(c: char, i: |char|) { d(c, i, false); }
+
+    pub fn decompose_compatible(c: char, i: |char|) { d(c, i, true); }
+
+    pub fn canonical_combining_class(c: char) -> u8 {
+        bsearch_range_value_table(c, combining_class_table)
+    }
+
+    fn d(c: char, i: |char|, k: bool) {
+        use iter::Iterator;
+
+        // 7-bit ASCII never decomposes
+        if c <= '\\x7f' { i(c); return; }
+
+        // Perform decomposition for Hangul
+        if (c as u32) >= S_BASE && (c as u32) < (S_BASE + S_COUNT) {
+            decompose_hangul(c, i);
+            return;
+        }
+
+        // First check the canonical decompositions
         match bsearch_table(c, canonical_table) {
             Some(canon) => {
                 for x in canon.iter() {
@@ -331,13 +339,12 @@ def emit_decomp_module(f, canon, compat, combine):
                 return;
             }
             None => ()
-        }\n\n""")
+        }
 
-    # Bottom out if we're not doing compat.
-    f.write("        if !k { i(c); return; }\n")
+        // Bottom out if we're not doing compat.
+        if !k { i(c); return; }
 
-    # Then check the compatibility decompositions
-    f.write("""
+        // Then check the compatibility decompositions
         match bsearch_table(c, compatibility_table) {
             Some(compat) => {
                 for x in compat.iter() {
@@ -346,12 +353,45 @@ def emit_decomp_module(f, canon, compat, combine):
                 return;
             }
             None => ()
-        }\n\n""")
+        }
 
-    # Finally bottom out.
-    f.write("        i(c);\n")
-    f.write("    }\n")
-    f.write("}\n\n")
+        // Finally bottom out.
+        i(c);
+    }
+
+    // Constants from Unicode 6.2.0 Section 3.12 Conjoining Jamo Behavior
+    static S_BASE: u32 = 0xAC00;
+    static L_BASE: u32 = 0x1100;
+    static V_BASE: u32 = 0x1161;
+    static T_BASE: u32 = 0x11A7;
+    static L_COUNT: u32 = 19;
+    static V_COUNT: u32 = 21;
+    static T_COUNT: u32 = 28;
+    static N_COUNT: u32 = (V_COUNT * T_COUNT);
+    static S_COUNT: u32 = (L_COUNT * N_COUNT);
+
+    // Decompose a precomposed Hangul syllable
+    fn decompose_hangul(s: char, f: |char|) {
+        use cast::transmute;
+
+        let si = s as u32 - S_BASE;
+
+        let li = si / N_COUNT;
+        unsafe {
+            f(transmute(L_BASE + li));
+
+            let vi = (si % N_COUNT) / T_COUNT;
+            f(transmute(V_BASE + vi));
+
+            let ti = si % T_COUNT;
+            if ti > 0 {
+                f(transmute(T_BASE + ti));
+            }
+        }
+    }
+}
+
+""")
 
 r = "unicode.rs"
 for i in [r]:
diff --git a/src/libstd/char.rs b/src/libstd/char.rs
index 05a503ecffcc9..c731b922eb4eb 100644
--- a/src/libstd/char.rs
+++ b/src/libstd/char.rs
@@ -28,7 +28,12 @@ use cast::transmute;
 use option::{None, Option, Some};
 use iter::{Iterator, range_step};
 use str::StrSlice;
-use unicode::{derived_property, property, general_category, decompose};
+use unicode::{derived_property, property, general_category};
+
+/// Returns the canonical decomposition of a character.
+pub use unicode::normalization::decompose_canonical;
+/// Returns the compatibility decomposition of a character.
+pub use unicode::normalization::decompose_compatible;
 
 #[cfg(test)] use str::OwnedStr;
 
@@ -256,53 +261,6 @@ pub fn from_digit(num: uint, radix: uint) -> Option<char> {
     }
 }
 
-// Constants from Unicode 6.2.0 Section 3.12 Conjoining Jamo Behavior
-static S_BASE: u32 = 0xAC00;
-static L_BASE: u32 = 0x1100;
-static V_BASE: u32 = 0x1161;
-static T_BASE: u32 = 0x11A7;
-static L_COUNT: u32 = 19;
-static V_COUNT: u32 = 21;
-static T_COUNT: u32 = 28;
-static N_COUNT: u32 = (V_COUNT * T_COUNT);
-static S_COUNT: u32 = (L_COUNT * N_COUNT);
-
-// Decompose a precomposed Hangul syllable
-fn decompose_hangul(s: char, f: |char|) {
-    let si = s as u32 - S_BASE;
-
-    let li = si / N_COUNT;
-    unsafe {
-        f(transmute(L_BASE + li));
-
-        let vi = (si % N_COUNT) / T_COUNT;
-        f(transmute(V_BASE + vi));
-
-        let ti = si % T_COUNT;
-        if ti > 0 {
-            f(transmute(T_BASE + ti));
-        }
-    }
-}
-
-/// Returns the canonical decomposition of a character
-pub fn decompose_canonical(c: char, f: |char|) {
-    if (c as u32) < S_BASE || (c as u32) >= (S_BASE + S_COUNT) {
-        decompose::canonical(c, f);
-    } else {
-        decompose_hangul(c, f);
-    }
-}
-
-/// Returns the compatibility decomposition of a character
-pub fn decompose_compatible(c: char, f: |char|) {
-    if (c as u32) < S_BASE || (c as u32) >= (S_BASE + S_COUNT) {
-        decompose::compatibility(c, f);
-    } else {
-        decompose_hangul(c, f);
-    }
-}
-
 ///
 /// Returns the hexadecimal Unicode escape of a `char`
 ///
diff --git a/src/libstd/unicode.rs b/src/libstd/unicode.rs
index 144500fac5d96..866580ce9c986 100644
--- a/src/libstd/unicode.rs
+++ b/src/libstd/unicode.rs
@@ -3624,9 +3624,10 @@ pub mod decompose {
         ('\U0001d185', '\U0001d189', 230), ('\U0001d18a', '\U0001d18b', 220),
         ('\U0001d1aa', '\U0001d1ad', 230), ('\U0001d242', '\U0001d244', 230)
     ];
-    pub fn canonical(c: char, i: |char|) { d(c, i, false); }
 
-    pub fn compatibility(c: char, i: |char|) { d(c, i, true); }
+    pub fn decompose_canonical(c: char, i: |char|) { d(c, i, false); }
+
+    pub fn decompose_compatible(c: char, i: |char|) { d(c, i, true); }
 
     pub fn canonical_combining_class(c: char) -> u8 {
         bsearch_range_value_table(c, combining_class_table)
@@ -3634,8 +3635,17 @@ pub mod decompose {
 
     fn d(c: char, i: |char|, k: bool) {
         use iter::Iterator;
+
+        // 7-bit ASCII never decomposes
         if c <= '\x7f' { i(c); return; }
 
+        // Perform decomposition for Hangul
+        if (c as u32) >= S_BASE && (c as u32) < (S_BASE + S_COUNT) {
+            decompose_hangul(c, i);
+            return;
+        }
+
+        // First check the canonical decompositions
         match bsearch_table(c, canonical_table) {
             Some(canon) => {
                 for x in canon.iter() {
@@ -3646,8 +3656,10 @@ pub mod decompose {
             None => ()
         }
 
+        // Bottom out if we're not doing compat.
         if !k { i(c); return; }
 
+        // Then check the compatibility decompositions
         match bsearch_table(c, compatibility_table) {
             Some(compat) => {
                 for x in compat.iter() {
@@ -3658,8 +3670,40 @@ pub mod decompose {
             None => ()
         }
 
+        // Finally bottom out.
         i(c);
     }
+
+    // Constants from Unicode 6.2.0 Section 3.12 Conjoining Jamo Behavior
+    static S_BASE: u32 = 0xAC00;
+    static L_BASE: u32 = 0x1100;
+    static V_BASE: u32 = 0x1161;
+    static T_BASE: u32 = 0x11A7;
+    static L_COUNT: u32 = 19;
+    static V_COUNT: u32 = 21;
+    static T_COUNT: u32 = 28;
+    static N_COUNT: u32 = (V_COUNT * T_COUNT);
+    static S_COUNT: u32 = (L_COUNT * N_COUNT);
+
+    // Decompose a precomposed Hangul syllable
+    fn decompose_hangul(s: char, f: |char|) {
+        use cast::transmute;
+
+        let si = s as u32 - S_BASE;
+
+        let li = si / N_COUNT;
+        unsafe {
+            f(transmute(L_BASE + li));
+
+            let vi = (si % N_COUNT) / T_COUNT;
+            f(transmute(V_BASE + vi));
+
+            let ti = si % T_COUNT;
+            if ti > 0 {
+                f(transmute(T_BASE + ti));
+            }
+        }
+    }
 }
 
 pub mod derived_property {

From f41ecef1500dd15a88e7995617951c8b74b0ab5a Mon Sep 17 00:00:00 2001
From: Florian Zeitz <florob@babelmonkeys.de>
Date: Fri, 7 Mar 2014 03:39:06 +0100
Subject: [PATCH 4/5] std: Rename str::Normalizations to str::Decompositions

---
 src/etc/unicode.py    |  2 +-
 src/libstd/str.rs     | 36 ++++++++++++++++++------------------
 src/libstd/unicode.rs |  2 +-
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/etc/unicode.py b/src/etc/unicode.py
index 7b93e10b26549..6e058c37a01a1 100755
--- a/src/etc/unicode.py
+++ b/src/etc/unicode.py
@@ -225,7 +225,7 @@ def emit_decomp_module(f, canon, compat, combine):
 
     compat_keys = compat.keys()
     compat_keys.sort()
-    f.write("pub mod decompose {\n");
+    f.write("pub mod normalization {\n");
     f.write("    use option::Option;\n");
     f.write("    use option::{Some, None};\n");
     f.write("    use vec::ImmutableVector;\n");
diff --git a/src/libstd/str.rs b/src/libstd/str.rs
index 3464c4a1128e3..ce2c7e8af88f9 100644
--- a/src/libstd/str.rs
+++ b/src/libstd/str.rs
@@ -587,25 +587,25 @@ fn canonical_sort(comb: &mut [(char, u8)]) {
 }
 
 #[deriving(Clone)]
-enum NormalizationForm {
-    NFD,
-    NFKD
+enum DecompositionType {
+    Canonical,
+    Compatible
 }
 
-/// External iterator for a string's normalization's characters.
+/// External iterator for a string's decomposition's characters.
 /// Use with the `std::iter` module.
 #[deriving(Clone)]
-pub struct Normalizations<'a> {
-    priv kind: NormalizationForm,
+pub struct Decompositions<'a> {
+    priv kind: DecompositionType,
     priv iter: Chars<'a>,
     priv buffer: ~[(char, u8)],
     priv sorted: bool
 }
 
-impl<'a> Iterator<char> for Normalizations<'a> {
+impl<'a> Iterator<char> for Decompositions<'a> {
     #[inline]
     fn next(&mut self) -> Option<char> {
-        use unicode::decompose::canonical_combining_class;
+        use unicode::normalization::canonical_combining_class;
 
         match self.buffer.head() {
             Some(&(c, 0)) => {
@@ -621,8 +621,8 @@ impl<'a> Iterator<char> for Normalizations<'a> {
         }
 
         let decomposer = match self.kind {
-            NFD => char::decompose_canonical,
-            NFKD => char::decompose_compatible
+            Canonical => char::decompose_canonical,
+            Compatible => char::decompose_compatible
         };
 
         if !self.sorted {
@@ -1858,11 +1858,11 @@ pub trait StrSlice<'a> {
 
     /// An Iterator over the string in Unicode Normalization Form D
     /// (canonical decomposition).
-    fn nfd_chars(&self) -> Normalizations<'a>;
+    fn nfd_chars(&self) -> Decompositions<'a>;
 
     /// An Iterator over the string in Unicode Normalization Form KD
     /// (compatibility decomposition).
-    fn nfkd_chars(&self) -> Normalizations<'a>;
+    fn nfkd_chars(&self) -> Decompositions<'a>;
 
     /// Returns true if the string contains only whitespace.
     ///
@@ -2444,22 +2444,22 @@ impl<'a> StrSlice<'a> for &'a str {
     }
 
     #[inline]
-    fn nfd_chars(&self) -> Normalizations<'a> {
-        Normalizations {
+    fn nfd_chars(&self) -> Decompositions<'a> {
+        Decompositions {
             iter: self.chars(),
             buffer: ~[],
             sorted: false,
-            kind: NFD
+            kind: Canonical
         }
     }
 
     #[inline]
-    fn nfkd_chars(&self) -> Normalizations<'a> {
-        Normalizations {
+    fn nfkd_chars(&self) -> Decompositions<'a> {
+        Decompositions {
             iter: self.chars(),
             buffer: ~[],
             sorted: false,
-            kind: NFKD
+            kind: Compatible
         }
     }
 
diff --git a/src/libstd/unicode.rs b/src/libstd/unicode.rs
index 866580ce9c986..1edc26e21e207 100644
--- a/src/libstd/unicode.rs
+++ b/src/libstd/unicode.rs
@@ -1445,7 +1445,7 @@ pub mod general_category {
     }
 
 }
-pub mod decompose {
+pub mod normalization {
     use option::Option;
     use option::{Some, None};
     use vec::ImmutableVector;

From 8e444f289c5c71f7c891f8e01f74e06e8a89e557 Mon Sep 17 00:00:00 2001
From: Florian Zeitz <florob@babelmonkeys.de>
Date: Sun, 9 Mar 2014 22:20:24 +0100
Subject: [PATCH 5/5] std: Add support for NFC and NFKC

---
 src/etc/unicode.py    |  82 +++++++++-
 src/libstd/str.rs     | 169 +++++++++++++++++++-
 src/libstd/unicode.rs | 357 +++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 600 insertions(+), 8 deletions(-)

diff --git a/src/etc/unicode.py b/src/etc/unicode.py
index 6e058c37a01a1..eac56192b6e46 100755
--- a/src/etc/unicode.py
+++ b/src/etc/unicode.py
@@ -219,7 +219,7 @@ def format_table_content(f, content, indent):
             line = " "*indent + chunk
     f.write(line)
 
-def emit_decomp_module(f, canon, compat, combine):
+def emit_norm_module(f, canon, compat, combine, norm_props):
     canon_keys = canon.keys()
     canon_keys.sort()
 
@@ -230,7 +230,7 @@ def emit_decomp_module(f, canon, compat, combine):
     f.write("    use option::{Some, None};\n");
     f.write("    use vec::ImmutableVector;\n");
     f.write("""
-    fn bsearch_table(c: char, r: &'static [(char, &'static [char])]) -> Option<&'static [char]> {
+    fn bsearch_table<T>(c: char, r: &'static [(char, &'static [T])]) -> Option<&'static [T]> {
         use cmp::{Equal, Less, Greater};
         match r.bsearch(|&(val, _)| {
             if c == val { Equal }
@@ -301,6 +301,39 @@ def emit_decomp_module(f, canon, compat, combine):
     format_table_content(f, data, 8)
     f.write("\n    ];\n\n")
 
+
+    canon_comp = {}
+    comp_exclusions = norm_props["Full_Composition_Exclusion"]
+    for char in canon_keys:
+        if True in map(lambda (lo, hi): lo <= char <= hi, comp_exclusions):
+            continue
+        decomp = canon[char]
+        if len(decomp) == 2:
+            if not canon_comp.has_key(decomp[0]):
+                canon_comp[decomp[0]] = []
+            canon_comp[decomp[0]].append( (decomp[1], char) )
+    canon_comp_keys = canon_comp.keys()
+    canon_comp_keys.sort()
+    f.write("    static composition_table : &'static [(char, &'static [(char, char)])] = &[\n")
+    data = ""
+    first = True
+    for char in canon_comp_keys:
+        if not first:
+            data += ","
+        first = False
+        data += "(%s, &[" % escape_char(char)
+        canon_comp[char].sort(lambda x, y: x[0] - y[0])
+        first2 = True
+        for pair in canon_comp[char]:
+            if not first2:
+                data += ","
+            first2 = False
+            data += "(%s, %s)" % (escape_char(pair[0]), escape_char(pair[1]))
+        data += "])"
+    format_table_content(f, data, 8)
+    f.write("\n    ];\n\n")
+
+
     f.write("    static combining_class_table : &'static [(char, char, u8)] = &[\n")
     ix = 0
     for pair in combine:
@@ -314,6 +347,28 @@ def emit_decomp_module(f, canon, compat, combine):
 
     pub fn decompose_compatible(c: char, i: |char|) { d(c, i, true); }
 
+    pub fn compose(a: char, b: char) -> Option<char> {
+        use cmp::{Equal, Less, Greater};
+        compose_hangul(a, b).or_else(|| {
+            match bsearch_table(a, composition_table) {
+                None => None,
+                Some(candidates) => {
+                    match candidates.bsearch(|&(val, _)| {
+                        if b == val { Equal }
+                        else if val < b { Less }
+                        else { Greater }
+                    }) {
+                        Some(idx) => {
+                            let (_, result) = candidates[idx];
+                            Some(result)
+                        }
+                        None => None
+                    }
+                }
+            }
+        })
+    }
+
     pub fn canonical_combining_class(c: char) -> u8 {
         bsearch_range_value_table(c, combining_class_table)
     }
@@ -371,6 +426,7 @@ def emit_decomp_module(f, canon, compat, combine):
     static S_COUNT: u32 = (L_COUNT * N_COUNT);
 
     // Decompose a precomposed Hangul syllable
+    #[inline(always)]
     fn decompose_hangul(s: char, f: |char|) {
         use cast::transmute;
 
@@ -389,6 +445,25 @@ def emit_decomp_module(f, canon, compat, combine):
             }
         }
     }
+
+    // Compose a pair of Hangul Jamo
+    #[inline(always)]
+    fn compose_hangul(a: char, b: char) -> Option<char> {
+        use cast::transmute;
+        let l = a as u32;
+        let v = b as u32;
+        // Compose an LPart and a VPart
+        if L_BASE <= l && l < (L_BASE + L_COUNT) && V_BASE <= v && v < (V_BASE + V_COUNT) {
+            let r = S_BASE + (l - L_BASE) * N_COUNT + (v - V_BASE) * T_COUNT;
+            unsafe { return Some(transmute(r)); }
+        }
+        // Compose an LVPart and a TPart
+        if S_BASE <= l && l <= (S_BASE+S_COUNT-T_COUNT) && T_BASE <= v && v < (T_BASE+T_COUNT) {
+            let r = l + (v - T_BASE);
+            unsafe { return Some(transmute(r)); }
+        }
+        None
+    }
 }
 
 """)
@@ -422,7 +497,8 @@ def emit_decomp_module(f, canon, compat, combine):
 
 emit_property_module(rf, "general_category", gencats)
 
-emit_decomp_module(rf, canon_decomp, compat_decomp, combines)
+norm_props = load_properties("DerivedNormalizationProps.txt", ["Full_Composition_Exclusion"])
+emit_norm_module(rf, canon_decomp, compat_decomp, combines, norm_props)
 
 derived = load_properties("DerivedCoreProperties.txt",
         ["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase"])
diff --git a/src/libstd/str.rs b/src/libstd/str.rs
index ce2c7e8af88f9..8abc8e90936aa 100644
--- a/src/libstd/str.rs
+++ b/src/libstd/str.rs
@@ -99,6 +99,7 @@ use option::{None, Option, Some};
 use ptr;
 use ptr::RawPtr;
 use from_str::FromStr;
+use unicode::normalization::{canonical_combining_class, compose};
 use vec;
 use vec::{OwnedVector, OwnedCloneableVector, ImmutableVector, MutableVector};
 use vec_ng::Vec;
@@ -575,8 +576,8 @@ fn canonical_sort(comb: &mut [(char, u8)]) {
     for i in range(0, len) {
         let mut swapped = false;
         for j in range(1, len-i) {
-            let class_a = *comb[j-1].ref1();
-            let class_b = *comb[j].ref1();
+            let class_a = comb[j-1].val1();
+            let class_b = comb[j].val1();
             if class_a != 0 && class_b != 0 && class_a > class_b {
                 comb.swap(j-1, j);
                 swapped = true;
@@ -605,8 +606,6 @@ pub struct Decompositions<'a> {
 impl<'a> Iterator<char> for Decompositions<'a> {
     #[inline]
     fn next(&mut self) -> Option<char> {
-        use unicode::normalization::canonical_combining_class;
-
         match self.buffer.head() {
             Some(&(c, 0)) => {
                 self.sorted = false;
@@ -662,6 +661,106 @@ impl<'a> Iterator<char> for Decompositions<'a> {
     }
 }
 
+#[deriving(Clone)]
+enum RecompositionState {
+    Composing,
+    Purging,
+    Finished
+}
+
+/// External iterator for a string's recomposition's characters.
+/// Use with the `std::iter` module.
+#[deriving(Clone)]
+pub struct Recompositions<'a> {
+    priv iter: Decompositions<'a>,
+    priv state: RecompositionState,
+    priv buffer: ~[char],
+    priv composee: Option<char>,
+    priv last_ccc: Option<u8>
+}
+
+impl<'a> Iterator<char> for Recompositions<'a> {
+    #[inline]
+    fn next(&mut self) -> Option<char> {
+        loop {
+            match self.state {
+                Composing => {
+                    for ch in self.iter {
+                        let ch_class = canonical_combining_class(ch);
+                        if self.composee.is_none() {
+                            if ch_class != 0 {
+                                return Some(ch);
+                            }
+                            self.composee = Some(ch);
+                            continue;
+                        }
+                        let k = self.composee.clone().unwrap();
+
+                        match self.last_ccc {
+                            None => {
+                                match compose(k, ch) {
+                                    Some(r) => {
+                                        self.composee = Some(r);
+                                        continue;
+                                    }
+                                    None => {
+                                        if ch_class == 0 {
+                                            self.composee = Some(ch);
+                                            return Some(k);
+                                        }
+                                        self.buffer.push(ch);
+                                        self.last_ccc = Some(ch_class);
+                                    }
+                                }
+                            }
+                            Some(l_class) => {
+                                if l_class >= ch_class {
+                                    // `ch` is blocked from `composee`
+                                    if ch_class == 0 {
+                                        self.composee = Some(ch);
+                                        self.last_ccc = None;
+                                        self.state = Purging;
+                                        return Some(k);
+                                    }
+                                    self.buffer.push(ch);
+                                    self.last_ccc = Some(ch_class);
+                                    continue;
+                                }
+                                match compose(k, ch) {
+                                    Some(r) => {
+                                        self.composee = Some(r);
+                                        continue;
+                                    }
+                                    None => {
+                                        self.buffer.push(ch);
+                                        self.last_ccc = Some(ch_class);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    self.state = Finished;
+                    if self.composee.is_some() {
+                        return self.composee.take();
+                    }
+                }
+                Purging => {
+                    match self.buffer.shift() {
+                        None => self.state = Composing,
+                        s => return s
+                    }
+                }
+                Finished => {
+                    match self.buffer.shift() {
+                        None => return self.composee.take(),
+                        s => return s
+                    }
+                }
+            }
+        }
+    }
+}
+
 /// Replace all occurrences of one string with another
 ///
 /// # Arguments
@@ -1864,6 +1963,14 @@ pub trait StrSlice<'a> {
     /// (compatibility decomposition).
     fn nfkd_chars(&self) -> Decompositions<'a>;
 
+    /// An Iterator over the string in Unicode Normalization Form C
+    /// (canonical decomposition followed by canonical composition).
+    fn nfc_chars(&self) -> Recompositions<'a>;
+
+    /// An Iterator over the string in Unicode Normalization Form KC
+    /// (compatibility decomposition followed by canonical composition).
+    fn nfkc_chars(&self) -> Recompositions<'a>;
+
     /// Returns true if the string contains only whitespace.
     ///
     /// Whitespace characters are determined by `char::is_whitespace`.
@@ -2463,6 +2570,28 @@ impl<'a> StrSlice<'a> for &'a str {
         }
     }
 
+    #[inline]
+    fn nfc_chars(&self) -> Recompositions<'a> {
+        Recompositions {
+            iter: self.nfd_chars(),
+            state: Composing,
+            buffer: ~[],
+            composee: None,
+            last_ccc: None
+        }
+    }
+
+    #[inline]
+    fn nfkc_chars(&self) -> Recompositions<'a> {
+        Recompositions {
+            iter: self.nfkd_chars(),
+            state: Composing,
+            buffer: ~[],
+            composee: None,
+            last_ccc: None
+        }
+    }
+
     #[inline]
     fn is_whitespace(&self) -> bool { self.chars().all(char::is_whitespace) }
 
@@ -4257,6 +4386,38 @@ mod tests {
         assert_eq!("\uac1c".nfkd_chars().collect::<~str>(), ~"\u1100\u1162");
     }
 
+    #[test]
+    fn test_nfc_chars() {
+        assert_eq!("abc".nfc_chars().collect::<~str>(), ~"abc");
+        assert_eq!("\u1e0b\u01c4".nfc_chars().collect::<~str>(), ~"\u1e0b\u01c4");
+        assert_eq!("\u2026".nfc_chars().collect::<~str>(), ~"\u2026");
+        assert_eq!("\u2126".nfc_chars().collect::<~str>(), ~"\u03a9");
+        assert_eq!("\u1e0b\u0323".nfc_chars().collect::<~str>(), ~"\u1e0d\u0307");
+        assert_eq!("\u1e0d\u0307".nfc_chars().collect::<~str>(), ~"\u1e0d\u0307");
+        assert_eq!("a\u0301".nfc_chars().collect::<~str>(), ~"\xe1");
+        assert_eq!("\u0301a".nfc_chars().collect::<~str>(), ~"\u0301a");
+        assert_eq!("\ud4db".nfc_chars().collect::<~str>(), ~"\ud4db");
+        assert_eq!("\uac1c".nfc_chars().collect::<~str>(), ~"\uac1c");
+        assert_eq!("a\u0300\u0305\u0315\u05aeb".nfc_chars().collect::<~str>(),
+            ~"\xe0\u05ae\u0305\u0315b");
+    }
+
+    #[test]
+    fn test_nfkc_chars() {
+        assert_eq!("abc".nfkc_chars().collect::<~str>(), ~"abc");
+        assert_eq!("\u1e0b\u01c4".nfkc_chars().collect::<~str>(), ~"\u1e0bD\u017d");
+        assert_eq!("\u2026".nfkc_chars().collect::<~str>(), ~"...");
+        assert_eq!("\u2126".nfkc_chars().collect::<~str>(), ~"\u03a9");
+        assert_eq!("\u1e0b\u0323".nfkc_chars().collect::<~str>(), ~"\u1e0d\u0307");
+        assert_eq!("\u1e0d\u0307".nfkc_chars().collect::<~str>(), ~"\u1e0d\u0307");
+        assert_eq!("a\u0301".nfkc_chars().collect::<~str>(), ~"\xe1");
+        assert_eq!("\u0301a".nfkc_chars().collect::<~str>(), ~"\u0301a");
+        assert_eq!("\ud4db".nfkc_chars().collect::<~str>(), ~"\ud4db");
+        assert_eq!("\uac1c".nfkc_chars().collect::<~str>(), ~"\uac1c");
+        assert_eq!("a\u0300\u0305\u0315\u05aeb".nfkc_chars().collect::<~str>(),
+            ~"\xe0\u05ae\u0305\u0315b");
+    }
+
     #[test]
     fn test_lines() {
         let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
diff --git a/src/libstd/unicode.rs b/src/libstd/unicode.rs
index 1edc26e21e207..4e34ca59f5c88 100644
--- a/src/libstd/unicode.rs
+++ b/src/libstd/unicode.rs
@@ -1450,7 +1450,7 @@ pub mod normalization {
     use option::{Some, None};
     use vec::ImmutableVector;
 
-    fn bsearch_table(c: char, r: &'static [(char, &'static [char])]) -> Option<&'static [char]> {
+    fn bsearch_table<T>(c: char, r: &'static [(char, &'static [T])]) -> Option<&'static [T]> {
         use cmp::{Equal, Less, Greater};
         match r.bsearch(|&(val, _)| {
             if c == val { Equal }
@@ -3478,6 +3478,319 @@ pub mod normalization {
         &['\u53ef'])
     ];
 
+    static composition_table : &'static [(char, &'static [(char, char)])] = &[
+        ('\x3c',  &[('\u0338',  '\u226e')]), ('\x3d',  &[('\u0338',  '\u2260')]), ('\x3e',
+         &[('\u0338',  '\u226f')]), ('\x41',  &[('\u0300',  '\xc0'), ('\u0301',  '\xc1'), ('\u0302',
+         '\xc2'), ('\u0303',  '\xc3'), ('\u0304',  '\u0100'), ('\u0306',  '\u0102'), ('\u0307',
+         '\u0226'), ('\u0308',  '\xc4'), ('\u0309',  '\u1ea2'), ('\u030a',  '\xc5'), ('\u030c',
+         '\u01cd'), ('\u030f',  '\u0200'), ('\u0311',  '\u0202'), ('\u0323',  '\u1ea0'), ('\u0325',
+         '\u1e00'), ('\u0328',  '\u0104')]), ('\x42',  &[('\u0307',  '\u1e02'), ('\u0323',
+         '\u1e04'), ('\u0331',  '\u1e06')]), ('\x43',  &[('\u0301',  '\u0106'), ('\u0302',
+         '\u0108'), ('\u0307',  '\u010a'), ('\u030c',  '\u010c'), ('\u0327',  '\xc7')]), ('\x44',
+         &[('\u0307',  '\u1e0a'), ('\u030c',  '\u010e'), ('\u0323',  '\u1e0c'), ('\u0327',
+         '\u1e10'), ('\u032d',  '\u1e12'), ('\u0331',  '\u1e0e')]), ('\x45',  &[('\u0300',  '\xc8'),
+        ('\u0301',  '\xc9'), ('\u0302',  '\xca'), ('\u0303',  '\u1ebc'), ('\u0304',  '\u0112'),
+        ('\u0306',  '\u0114'), ('\u0307',  '\u0116'), ('\u0308',  '\xcb'), ('\u0309',  '\u1eba'),
+        ('\u030c',  '\u011a'), ('\u030f',  '\u0204'), ('\u0311',  '\u0206'), ('\u0323',  '\u1eb8'),
+        ('\u0327',  '\u0228'), ('\u0328',  '\u0118'), ('\u032d',  '\u1e18'), ('\u0330',
+         '\u1e1a')]), ('\x46',  &[('\u0307',  '\u1e1e')]), ('\x47',  &[('\u0301',  '\u01f4'),
+        ('\u0302',  '\u011c'), ('\u0304',  '\u1e20'), ('\u0306',  '\u011e'), ('\u0307',  '\u0120'),
+        ('\u030c',  '\u01e6'), ('\u0327',  '\u0122')]), ('\x48',  &[('\u0302',  '\u0124'),
+        ('\u0307',  '\u1e22'), ('\u0308',  '\u1e26'), ('\u030c',  '\u021e'), ('\u0323',  '\u1e24'),
+        ('\u0327',  '\u1e28'), ('\u032e',  '\u1e2a')]), ('\x49',  &[('\u0300',  '\xcc'), ('\u0301',
+         '\xcd'), ('\u0302',  '\xce'), ('\u0303',  '\u0128'), ('\u0304',  '\u012a'), ('\u0306',
+         '\u012c'), ('\u0307',  '\u0130'), ('\u0308',  '\xcf'), ('\u0309',  '\u1ec8'), ('\u030c',
+         '\u01cf'), ('\u030f',  '\u0208'), ('\u0311',  '\u020a'), ('\u0323',  '\u1eca'), ('\u0328',
+         '\u012e'), ('\u0330',  '\u1e2c')]), ('\x4a',  &[('\u0302',  '\u0134')]), ('\x4b',
+         &[('\u0301',  '\u1e30'), ('\u030c',  '\u01e8'), ('\u0323',  '\u1e32'), ('\u0327',
+         '\u0136'), ('\u0331',  '\u1e34')]), ('\x4c',  &[('\u0301',  '\u0139'), ('\u030c',
+         '\u013d'), ('\u0323',  '\u1e36'), ('\u0327',  '\u013b'), ('\u032d',  '\u1e3c'), ('\u0331',
+         '\u1e3a')]), ('\x4d',  &[('\u0301',  '\u1e3e'), ('\u0307',  '\u1e40'), ('\u0323',
+         '\u1e42')]), ('\x4e',  &[('\u0300',  '\u01f8'), ('\u0301',  '\u0143'), ('\u0303',  '\xd1'),
+        ('\u0307',  '\u1e44'), ('\u030c',  '\u0147'), ('\u0323',  '\u1e46'), ('\u0327',  '\u0145'),
+        ('\u032d',  '\u1e4a'), ('\u0331',  '\u1e48')]), ('\x4f',  &[('\u0300',  '\xd2'), ('\u0301',
+         '\xd3'), ('\u0302',  '\xd4'), ('\u0303',  '\xd5'), ('\u0304',  '\u014c'), ('\u0306',
+         '\u014e'), ('\u0307',  '\u022e'), ('\u0308',  '\xd6'), ('\u0309',  '\u1ece'), ('\u030b',
+         '\u0150'), ('\u030c',  '\u01d1'), ('\u030f',  '\u020c'), ('\u0311',  '\u020e'), ('\u031b',
+         '\u01a0'), ('\u0323',  '\u1ecc'), ('\u0328',  '\u01ea')]), ('\x50',  &[('\u0301',
+         '\u1e54'), ('\u0307',  '\u1e56')]), ('\x52',  &[('\u0301',  '\u0154'), ('\u0307',
+         '\u1e58'), ('\u030c',  '\u0158'), ('\u030f',  '\u0210'), ('\u0311',  '\u0212'), ('\u0323',
+         '\u1e5a'), ('\u0327',  '\u0156'), ('\u0331',  '\u1e5e')]), ('\x53',  &[('\u0301',
+         '\u015a'), ('\u0302',  '\u015c'), ('\u0307',  '\u1e60'), ('\u030c',  '\u0160'), ('\u0323',
+         '\u1e62'), ('\u0326',  '\u0218'), ('\u0327',  '\u015e')]), ('\x54',  &[('\u0307',
+         '\u1e6a'), ('\u030c',  '\u0164'), ('\u0323',  '\u1e6c'), ('\u0326',  '\u021a'), ('\u0327',
+         '\u0162'), ('\u032d',  '\u1e70'), ('\u0331',  '\u1e6e')]), ('\x55',  &[('\u0300',  '\xd9'),
+        ('\u0301',  '\xda'), ('\u0302',  '\xdb'), ('\u0303',  '\u0168'), ('\u0304',  '\u016a'),
+        ('\u0306',  '\u016c'), ('\u0308',  '\xdc'), ('\u0309',  '\u1ee6'), ('\u030a',  '\u016e'),
+        ('\u030b',  '\u0170'), ('\u030c',  '\u01d3'), ('\u030f',  '\u0214'), ('\u0311',  '\u0216'),
+        ('\u031b',  '\u01af'), ('\u0323',  '\u1ee4'), ('\u0324',  '\u1e72'), ('\u0328',  '\u0172'),
+        ('\u032d',  '\u1e76'), ('\u0330',  '\u1e74')]), ('\x56',  &[('\u0303',  '\u1e7c'),
+        ('\u0323',  '\u1e7e')]), ('\x57',  &[('\u0300',  '\u1e80'), ('\u0301',  '\u1e82'),
+        ('\u0302',  '\u0174'), ('\u0307',  '\u1e86'), ('\u0308',  '\u1e84'), ('\u0323',
+         '\u1e88')]), ('\x58',  &[('\u0307',  '\u1e8a'), ('\u0308',  '\u1e8c')]), ('\x59',
+         &[('\u0300',  '\u1ef2'), ('\u0301',  '\xdd'), ('\u0302',  '\u0176'), ('\u0303',  '\u1ef8'),
+        ('\u0304',  '\u0232'), ('\u0307',  '\u1e8e'), ('\u0308',  '\u0178'), ('\u0309',  '\u1ef6'),
+        ('\u0323',  '\u1ef4')]), ('\x5a',  &[('\u0301',  '\u0179'), ('\u0302',  '\u1e90'),
+        ('\u0307',  '\u017b'), ('\u030c',  '\u017d'), ('\u0323',  '\u1e92'), ('\u0331',
+         '\u1e94')]), ('\x61',  &[('\u0300',  '\xe0'), ('\u0301',  '\xe1'), ('\u0302',  '\xe2'),
+        ('\u0303',  '\xe3'), ('\u0304',  '\u0101'), ('\u0306',  '\u0103'), ('\u0307',  '\u0227'),
+        ('\u0308',  '\xe4'), ('\u0309',  '\u1ea3'), ('\u030a',  '\xe5'), ('\u030c',  '\u01ce'),
+        ('\u030f',  '\u0201'), ('\u0311',  '\u0203'), ('\u0323',  '\u1ea1'), ('\u0325',  '\u1e01'),
+        ('\u0328',  '\u0105')]), ('\x62',  &[('\u0307',  '\u1e03'), ('\u0323',  '\u1e05'),
+        ('\u0331',  '\u1e07')]), ('\x63',  &[('\u0301',  '\u0107'), ('\u0302',  '\u0109'),
+        ('\u0307',  '\u010b'), ('\u030c',  '\u010d'), ('\u0327',  '\xe7')]), ('\x64',  &[('\u0307',
+         '\u1e0b'), ('\u030c',  '\u010f'), ('\u0323',  '\u1e0d'), ('\u0327',  '\u1e11'), ('\u032d',
+         '\u1e13'), ('\u0331',  '\u1e0f')]), ('\x65',  &[('\u0300',  '\xe8'), ('\u0301',  '\xe9'),
+        ('\u0302',  '\xea'), ('\u0303',  '\u1ebd'), ('\u0304',  '\u0113'), ('\u0306',  '\u0115'),
+        ('\u0307',  '\u0117'), ('\u0308',  '\xeb'), ('\u0309',  '\u1ebb'), ('\u030c',  '\u011b'),
+        ('\u030f',  '\u0205'), ('\u0311',  '\u0207'), ('\u0323',  '\u1eb9'), ('\u0327',  '\u0229'),
+        ('\u0328',  '\u0119'), ('\u032d',  '\u1e19'), ('\u0330',  '\u1e1b')]), ('\x66',
+         &[('\u0307',  '\u1e1f')]), ('\x67',  &[('\u0301',  '\u01f5'), ('\u0302',  '\u011d'),
+        ('\u0304',  '\u1e21'), ('\u0306',  '\u011f'), ('\u0307',  '\u0121'), ('\u030c',  '\u01e7'),
+        ('\u0327',  '\u0123')]), ('\x68',  &[('\u0302',  '\u0125'), ('\u0307',  '\u1e23'),
+        ('\u0308',  '\u1e27'), ('\u030c',  '\u021f'), ('\u0323',  '\u1e25'), ('\u0327',  '\u1e29'),
+        ('\u032e',  '\u1e2b'), ('\u0331',  '\u1e96')]), ('\x69',  &[('\u0300',  '\xec'), ('\u0301',
+         '\xed'), ('\u0302',  '\xee'), ('\u0303',  '\u0129'), ('\u0304',  '\u012b'), ('\u0306',
+         '\u012d'), ('\u0308',  '\xef'), ('\u0309',  '\u1ec9'), ('\u030c',  '\u01d0'), ('\u030f',
+         '\u0209'), ('\u0311',  '\u020b'), ('\u0323',  '\u1ecb'), ('\u0328',  '\u012f'), ('\u0330',
+         '\u1e2d')]), ('\x6a',  &[('\u0302',  '\u0135'), ('\u030c',  '\u01f0')]), ('\x6b',
+         &[('\u0301',  '\u1e31'), ('\u030c',  '\u01e9'), ('\u0323',  '\u1e33'), ('\u0327',
+         '\u0137'), ('\u0331',  '\u1e35')]), ('\x6c',  &[('\u0301',  '\u013a'), ('\u030c',
+         '\u013e'), ('\u0323',  '\u1e37'), ('\u0327',  '\u013c'), ('\u032d',  '\u1e3d'), ('\u0331',
+         '\u1e3b')]), ('\x6d',  &[('\u0301',  '\u1e3f'), ('\u0307',  '\u1e41'), ('\u0323',
+         '\u1e43')]), ('\x6e',  &[('\u0300',  '\u01f9'), ('\u0301',  '\u0144'), ('\u0303',  '\xf1'),
+        ('\u0307',  '\u1e45'), ('\u030c',  '\u0148'), ('\u0323',  '\u1e47'), ('\u0327',  '\u0146'),
+        ('\u032d',  '\u1e4b'), ('\u0331',  '\u1e49')]), ('\x6f',  &[('\u0300',  '\xf2'), ('\u0301',
+         '\xf3'), ('\u0302',  '\xf4'), ('\u0303',  '\xf5'), ('\u0304',  '\u014d'), ('\u0306',
+         '\u014f'), ('\u0307',  '\u022f'), ('\u0308',  '\xf6'), ('\u0309',  '\u1ecf'), ('\u030b',
+         '\u0151'), ('\u030c',  '\u01d2'), ('\u030f',  '\u020d'), ('\u0311',  '\u020f'), ('\u031b',
+         '\u01a1'), ('\u0323',  '\u1ecd'), ('\u0328',  '\u01eb')]), ('\x70',  &[('\u0301',
+         '\u1e55'), ('\u0307',  '\u1e57')]), ('\x72',  &[('\u0301',  '\u0155'), ('\u0307',
+         '\u1e59'), ('\u030c',  '\u0159'), ('\u030f',  '\u0211'), ('\u0311',  '\u0213'), ('\u0323',
+         '\u1e5b'), ('\u0327',  '\u0157'), ('\u0331',  '\u1e5f')]), ('\x73',  &[('\u0301',
+         '\u015b'), ('\u0302',  '\u015d'), ('\u0307',  '\u1e61'), ('\u030c',  '\u0161'), ('\u0323',
+         '\u1e63'), ('\u0326',  '\u0219'), ('\u0327',  '\u015f')]), ('\x74',  &[('\u0307',
+         '\u1e6b'), ('\u0308',  '\u1e97'), ('\u030c',  '\u0165'), ('\u0323',  '\u1e6d'), ('\u0326',
+         '\u021b'), ('\u0327',  '\u0163'), ('\u032d',  '\u1e71'), ('\u0331',  '\u1e6f')]), ('\x75',
+         &[('\u0300',  '\xf9'), ('\u0301',  '\xfa'), ('\u0302',  '\xfb'), ('\u0303',  '\u0169'),
+        ('\u0304',  '\u016b'), ('\u0306',  '\u016d'), ('\u0308',  '\xfc'), ('\u0309',  '\u1ee7'),
+        ('\u030a',  '\u016f'), ('\u030b',  '\u0171'), ('\u030c',  '\u01d4'), ('\u030f',  '\u0215'),
+        ('\u0311',  '\u0217'), ('\u031b',  '\u01b0'), ('\u0323',  '\u1ee5'), ('\u0324',  '\u1e73'),
+        ('\u0328',  '\u0173'), ('\u032d',  '\u1e77'), ('\u0330',  '\u1e75')]), ('\x76',
+         &[('\u0303',  '\u1e7d'), ('\u0323',  '\u1e7f')]), ('\x77',  &[('\u0300',  '\u1e81'),
+        ('\u0301',  '\u1e83'), ('\u0302',  '\u0175'), ('\u0307',  '\u1e87'), ('\u0308',  '\u1e85'),
+        ('\u030a',  '\u1e98'), ('\u0323',  '\u1e89')]), ('\x78',  &[('\u0307',  '\u1e8b'),
+        ('\u0308',  '\u1e8d')]), ('\x79',  &[('\u0300',  '\u1ef3'), ('\u0301',  '\xfd'), ('\u0302',
+         '\u0177'), ('\u0303',  '\u1ef9'), ('\u0304',  '\u0233'), ('\u0307',  '\u1e8f'), ('\u0308',
+         '\xff'), ('\u0309',  '\u1ef7'), ('\u030a',  '\u1e99'), ('\u0323',  '\u1ef5')]), ('\x7a',
+         &[('\u0301',  '\u017a'), ('\u0302',  '\u1e91'), ('\u0307',  '\u017c'), ('\u030c',
+         '\u017e'), ('\u0323',  '\u1e93'), ('\u0331',  '\u1e95')]), ('\xa8',  &[('\u0300',
+         '\u1fed'), ('\u0301',  '\u0385'), ('\u0342',  '\u1fc1')]), ('\xc2',  &[('\u0300',
+         '\u1ea6'), ('\u0301',  '\u1ea4'), ('\u0303',  '\u1eaa'), ('\u0309',  '\u1ea8')]), ('\xc4',
+         &[('\u0304',  '\u01de')]), ('\xc5',  &[('\u0301',  '\u01fa')]), ('\xc6',  &[('\u0301',
+         '\u01fc'), ('\u0304',  '\u01e2')]), ('\xc7',  &[('\u0301',  '\u1e08')]), ('\xca',
+         &[('\u0300',  '\u1ec0'), ('\u0301',  '\u1ebe'), ('\u0303',  '\u1ec4'), ('\u0309',
+         '\u1ec2')]), ('\xcf',  &[('\u0301',  '\u1e2e')]), ('\xd4',  &[('\u0300',  '\u1ed2'),
+        ('\u0301',  '\u1ed0'), ('\u0303',  '\u1ed6'), ('\u0309',  '\u1ed4')]), ('\xd5',
+         &[('\u0301',  '\u1e4c'), ('\u0304',  '\u022c'), ('\u0308',  '\u1e4e')]), ('\xd6',
+         &[('\u0304',  '\u022a')]), ('\xd8',  &[('\u0301',  '\u01fe')]), ('\xdc',  &[('\u0300',
+         '\u01db'), ('\u0301',  '\u01d7'), ('\u0304',  '\u01d5'), ('\u030c',  '\u01d9')]), ('\xe2',
+         &[('\u0300',  '\u1ea7'), ('\u0301',  '\u1ea5'), ('\u0303',  '\u1eab'), ('\u0309',
+         '\u1ea9')]), ('\xe4',  &[('\u0304',  '\u01df')]), ('\xe5',  &[('\u0301',  '\u01fb')]),
+        ('\xe6',  &[('\u0301',  '\u01fd'), ('\u0304',  '\u01e3')]), ('\xe7',  &[('\u0301',
+         '\u1e09')]), ('\xea',  &[('\u0300',  '\u1ec1'), ('\u0301',  '\u1ebf'), ('\u0303',
+         '\u1ec5'), ('\u0309',  '\u1ec3')]), ('\xef',  &[('\u0301',  '\u1e2f')]), ('\xf4',
+         &[('\u0300',  '\u1ed3'), ('\u0301',  '\u1ed1'), ('\u0303',  '\u1ed7'), ('\u0309',
+         '\u1ed5')]), ('\xf5',  &[('\u0301',  '\u1e4d'), ('\u0304',  '\u022d'), ('\u0308',
+         '\u1e4f')]), ('\xf6',  &[('\u0304',  '\u022b')]), ('\xf8',  &[('\u0301',  '\u01ff')]),
+        ('\xfc',  &[('\u0300',  '\u01dc'), ('\u0301',  '\u01d8'), ('\u0304',  '\u01d6'), ('\u030c',
+         '\u01da')]), ('\u0102',  &[('\u0300',  '\u1eb0'), ('\u0301',  '\u1eae'), ('\u0303',
+         '\u1eb4'), ('\u0309',  '\u1eb2')]), ('\u0103',  &[('\u0300',  '\u1eb1'), ('\u0301',
+         '\u1eaf'), ('\u0303',  '\u1eb5'), ('\u0309',  '\u1eb3')]), ('\u0112',  &[('\u0300',
+         '\u1e14'), ('\u0301',  '\u1e16')]), ('\u0113',  &[('\u0300',  '\u1e15'), ('\u0301',
+         '\u1e17')]), ('\u014c',  &[('\u0300',  '\u1e50'), ('\u0301',  '\u1e52')]), ('\u014d',
+         &[('\u0300',  '\u1e51'), ('\u0301',  '\u1e53')]), ('\u015a',  &[('\u0307',  '\u1e64')]),
+        ('\u015b',  &[('\u0307',  '\u1e65')]), ('\u0160',  &[('\u0307',  '\u1e66')]), ('\u0161',
+         &[('\u0307',  '\u1e67')]), ('\u0168',  &[('\u0301',  '\u1e78')]), ('\u0169',  &[('\u0301',
+         '\u1e79')]), ('\u016a',  &[('\u0308',  '\u1e7a')]), ('\u016b',  &[('\u0308',  '\u1e7b')]),
+        ('\u017f',  &[('\u0307',  '\u1e9b')]), ('\u01a0',  &[('\u0300',  '\u1edc'), ('\u0301',
+         '\u1eda'), ('\u0303',  '\u1ee0'), ('\u0309',  '\u1ede'), ('\u0323',  '\u1ee2')]),
+        ('\u01a1',  &[('\u0300',  '\u1edd'), ('\u0301',  '\u1edb'), ('\u0303',  '\u1ee1'),
+        ('\u0309',  '\u1edf'), ('\u0323',  '\u1ee3')]), ('\u01af',  &[('\u0300',  '\u1eea'),
+        ('\u0301',  '\u1ee8'), ('\u0303',  '\u1eee'), ('\u0309',  '\u1eec'), ('\u0323',
+         '\u1ef0')]), ('\u01b0',  &[('\u0300',  '\u1eeb'), ('\u0301',  '\u1ee9'), ('\u0303',
+         '\u1eef'), ('\u0309',  '\u1eed'), ('\u0323',  '\u1ef1')]), ('\u01b7',  &[('\u030c',
+         '\u01ee')]), ('\u01ea',  &[('\u0304',  '\u01ec')]), ('\u01eb',  &[('\u0304',  '\u01ed')]),
+        ('\u0226',  &[('\u0304',  '\u01e0')]), ('\u0227',  &[('\u0304',  '\u01e1')]), ('\u0228',
+         &[('\u0306',  '\u1e1c')]), ('\u0229',  &[('\u0306',  '\u1e1d')]), ('\u022e',  &[('\u0304',
+         '\u0230')]), ('\u022f',  &[('\u0304',  '\u0231')]), ('\u0292',  &[('\u030c',  '\u01ef')]),
+        ('\u0391',  &[('\u0300',  '\u1fba'), ('\u0301',  '\u0386'), ('\u0304',  '\u1fb9'),
+        ('\u0306',  '\u1fb8'), ('\u0313',  '\u1f08'), ('\u0314',  '\u1f09'), ('\u0345',
+         '\u1fbc')]), ('\u0395',  &[('\u0300',  '\u1fc8'), ('\u0301',  '\u0388'), ('\u0313',
+         '\u1f18'), ('\u0314',  '\u1f19')]), ('\u0397',  &[('\u0300',  '\u1fca'), ('\u0301',
+         '\u0389'), ('\u0313',  '\u1f28'), ('\u0314',  '\u1f29'), ('\u0345',  '\u1fcc')]),
+        ('\u0399',  &[('\u0300',  '\u1fda'), ('\u0301',  '\u038a'), ('\u0304',  '\u1fd9'),
+        ('\u0306',  '\u1fd8'), ('\u0308',  '\u03aa'), ('\u0313',  '\u1f38'), ('\u0314',
+         '\u1f39')]), ('\u039f',  &[('\u0300',  '\u1ff8'), ('\u0301',  '\u038c'), ('\u0313',
+         '\u1f48'), ('\u0314',  '\u1f49')]), ('\u03a1',  &[('\u0314',  '\u1fec')]), ('\u03a5',
+         &[('\u0300',  '\u1fea'), ('\u0301',  '\u038e'), ('\u0304',  '\u1fe9'), ('\u0306',
+         '\u1fe8'), ('\u0308',  '\u03ab'), ('\u0314',  '\u1f59')]), ('\u03a9',  &[('\u0300',
+         '\u1ffa'), ('\u0301',  '\u038f'), ('\u0313',  '\u1f68'), ('\u0314',  '\u1f69'), ('\u0345',
+         '\u1ffc')]), ('\u03ac',  &[('\u0345',  '\u1fb4')]), ('\u03ae',  &[('\u0345',  '\u1fc4')]),
+        ('\u03b1',  &[('\u0300',  '\u1f70'), ('\u0301',  '\u03ac'), ('\u0304',  '\u1fb1'),
+        ('\u0306',  '\u1fb0'), ('\u0313',  '\u1f00'), ('\u0314',  '\u1f01'), ('\u0342',  '\u1fb6'),
+        ('\u0345',  '\u1fb3')]), ('\u03b5',  &[('\u0300',  '\u1f72'), ('\u0301',  '\u03ad'),
+        ('\u0313',  '\u1f10'), ('\u0314',  '\u1f11')]), ('\u03b7',  &[('\u0300',  '\u1f74'),
+        ('\u0301',  '\u03ae'), ('\u0313',  '\u1f20'), ('\u0314',  '\u1f21'), ('\u0342',  '\u1fc6'),
+        ('\u0345',  '\u1fc3')]), ('\u03b9',  &[('\u0300',  '\u1f76'), ('\u0301',  '\u03af'),
+        ('\u0304',  '\u1fd1'), ('\u0306',  '\u1fd0'), ('\u0308',  '\u03ca'), ('\u0313',  '\u1f30'),
+        ('\u0314',  '\u1f31'), ('\u0342',  '\u1fd6')]), ('\u03bf',  &[('\u0300',  '\u1f78'),
+        ('\u0301',  '\u03cc'), ('\u0313',  '\u1f40'), ('\u0314',  '\u1f41')]), ('\u03c1',
+         &[('\u0313',  '\u1fe4'), ('\u0314',  '\u1fe5')]), ('\u03c5',  &[('\u0300',  '\u1f7a'),
+        ('\u0301',  '\u03cd'), ('\u0304',  '\u1fe1'), ('\u0306',  '\u1fe0'), ('\u0308',  '\u03cb'),
+        ('\u0313',  '\u1f50'), ('\u0314',  '\u1f51'), ('\u0342',  '\u1fe6')]), ('\u03c9',
+         &[('\u0300',  '\u1f7c'), ('\u0301',  '\u03ce'), ('\u0313',  '\u1f60'), ('\u0314',
+         '\u1f61'), ('\u0342',  '\u1ff6'), ('\u0345',  '\u1ff3')]), ('\u03ca',  &[('\u0300',
+         '\u1fd2'), ('\u0301',  '\u0390'), ('\u0342',  '\u1fd7')]), ('\u03cb',  &[('\u0300',
+         '\u1fe2'), ('\u0301',  '\u03b0'), ('\u0342',  '\u1fe7')]), ('\u03ce',  &[('\u0345',
+         '\u1ff4')]), ('\u03d2',  &[('\u0301',  '\u03d3'), ('\u0308',  '\u03d4')]), ('\u0406',
+         &[('\u0308',  '\u0407')]), ('\u0410',  &[('\u0306',  '\u04d0'), ('\u0308',  '\u04d2')]),
+        ('\u0413',  &[('\u0301',  '\u0403')]), ('\u0415',  &[('\u0300',  '\u0400'), ('\u0306',
+         '\u04d6'), ('\u0308',  '\u0401')]), ('\u0416',  &[('\u0306',  '\u04c1'), ('\u0308',
+         '\u04dc')]), ('\u0417',  &[('\u0308',  '\u04de')]), ('\u0418',  &[('\u0300',  '\u040d'),
+        ('\u0304',  '\u04e2'), ('\u0306',  '\u0419'), ('\u0308',  '\u04e4')]), ('\u041a',
+         &[('\u0301',  '\u040c')]), ('\u041e',  &[('\u0308',  '\u04e6')]), ('\u0423',  &[('\u0304',
+         '\u04ee'), ('\u0306',  '\u040e'), ('\u0308',  '\u04f0'), ('\u030b',  '\u04f2')]),
+        ('\u0427',  &[('\u0308',  '\u04f4')]), ('\u042b',  &[('\u0308',  '\u04f8')]), ('\u042d',
+         &[('\u0308',  '\u04ec')]), ('\u0430',  &[('\u0306',  '\u04d1'), ('\u0308',  '\u04d3')]),
+        ('\u0433',  &[('\u0301',  '\u0453')]), ('\u0435',  &[('\u0300',  '\u0450'), ('\u0306',
+         '\u04d7'), ('\u0308',  '\u0451')]), ('\u0436',  &[('\u0306',  '\u04c2'), ('\u0308',
+         '\u04dd')]), ('\u0437',  &[('\u0308',  '\u04df')]), ('\u0438',  &[('\u0300',  '\u045d'),
+        ('\u0304',  '\u04e3'), ('\u0306',  '\u0439'), ('\u0308',  '\u04e5')]), ('\u043a',
+         &[('\u0301',  '\u045c')]), ('\u043e',  &[('\u0308',  '\u04e7')]), ('\u0443',  &[('\u0304',
+         '\u04ef'), ('\u0306',  '\u045e'), ('\u0308',  '\u04f1'), ('\u030b',  '\u04f3')]),
+        ('\u0447',  &[('\u0308',  '\u04f5')]), ('\u044b',  &[('\u0308',  '\u04f9')]), ('\u044d',
+         &[('\u0308',  '\u04ed')]), ('\u0456',  &[('\u0308',  '\u0457')]), ('\u0474',  &[('\u030f',
+         '\u0476')]), ('\u0475',  &[('\u030f',  '\u0477')]), ('\u04d8',  &[('\u0308',  '\u04da')]),
+        ('\u04d9',  &[('\u0308',  '\u04db')]), ('\u04e8',  &[('\u0308',  '\u04ea')]), ('\u04e9',
+         &[('\u0308',  '\u04eb')]), ('\u0627',  &[('\u0653',  '\u0622'), ('\u0654',  '\u0623'),
+        ('\u0655',  '\u0625')]), ('\u0648',  &[('\u0654',  '\u0624')]), ('\u064a',  &[('\u0654',
+         '\u0626')]), ('\u06c1',  &[('\u0654',  '\u06c2')]), ('\u06d2',  &[('\u0654',  '\u06d3')]),
+        ('\u06d5',  &[('\u0654',  '\u06c0')]), ('\u0928',  &[('\u093c',  '\u0929')]), ('\u0930',
+         &[('\u093c',  '\u0931')]), ('\u0933',  &[('\u093c',  '\u0934')]), ('\u09c7',  &[('\u09be',
+         '\u09cb'), ('\u09d7',  '\u09cc')]), ('\u0b47',  &[('\u0b3e',  '\u0b4b'), ('\u0b56',
+         '\u0b48'), ('\u0b57',  '\u0b4c')]), ('\u0b92',  &[('\u0bd7',  '\u0b94')]), ('\u0bc6',
+         &[('\u0bbe',  '\u0bca'), ('\u0bd7',  '\u0bcc')]), ('\u0bc7',  &[('\u0bbe',  '\u0bcb')]),
+        ('\u0c46',  &[('\u0c56',  '\u0c48')]), ('\u0cbf',  &[('\u0cd5',  '\u0cc0')]), ('\u0cc6',
+         &[('\u0cc2',  '\u0cca'), ('\u0cd5',  '\u0cc7'), ('\u0cd6',  '\u0cc8')]), ('\u0cca',
+         &[('\u0cd5',  '\u0ccb')]), ('\u0d46',  &[('\u0d3e',  '\u0d4a'), ('\u0d57',  '\u0d4c')]),
+        ('\u0d47',  &[('\u0d3e',  '\u0d4b')]), ('\u0dd9',  &[('\u0dca',  '\u0dda'), ('\u0dcf',
+         '\u0ddc'), ('\u0ddf',  '\u0dde')]), ('\u0ddc',  &[('\u0dca',  '\u0ddd')]), ('\u1025',
+         &[('\u102e',  '\u1026')]), ('\u1b05',  &[('\u1b35',  '\u1b06')]), ('\u1b07',  &[('\u1b35',
+         '\u1b08')]), ('\u1b09',  &[('\u1b35',  '\u1b0a')]), ('\u1b0b',  &[('\u1b35',  '\u1b0c')]),
+        ('\u1b0d',  &[('\u1b35',  '\u1b0e')]), ('\u1b11',  &[('\u1b35',  '\u1b12')]), ('\u1b3a',
+         &[('\u1b35',  '\u1b3b')]), ('\u1b3c',  &[('\u1b35',  '\u1b3d')]), ('\u1b3e',  &[('\u1b35',
+         '\u1b40')]), ('\u1b3f',  &[('\u1b35',  '\u1b41')]), ('\u1b42',  &[('\u1b35',  '\u1b43')]),
+        ('\u1e36',  &[('\u0304',  '\u1e38')]), ('\u1e37',  &[('\u0304',  '\u1e39')]), ('\u1e5a',
+         &[('\u0304',  '\u1e5c')]), ('\u1e5b',  &[('\u0304',  '\u1e5d')]), ('\u1e62',  &[('\u0307',
+         '\u1e68')]), ('\u1e63',  &[('\u0307',  '\u1e69')]), ('\u1ea0',  &[('\u0302',  '\u1eac'),
+        ('\u0306',  '\u1eb6')]), ('\u1ea1',  &[('\u0302',  '\u1ead'), ('\u0306',  '\u1eb7')]),
+        ('\u1eb8',  &[('\u0302',  '\u1ec6')]), ('\u1eb9',  &[('\u0302',  '\u1ec7')]), ('\u1ecc',
+         &[('\u0302',  '\u1ed8')]), ('\u1ecd',  &[('\u0302',  '\u1ed9')]), ('\u1f00',  &[('\u0300',
+         '\u1f02'), ('\u0301',  '\u1f04'), ('\u0342',  '\u1f06'), ('\u0345',  '\u1f80')]),
+        ('\u1f01',  &[('\u0300',  '\u1f03'), ('\u0301',  '\u1f05'), ('\u0342',  '\u1f07'),
+        ('\u0345',  '\u1f81')]), ('\u1f02',  &[('\u0345',  '\u1f82')]), ('\u1f03',  &[('\u0345',
+         '\u1f83')]), ('\u1f04',  &[('\u0345',  '\u1f84')]), ('\u1f05',  &[('\u0345',  '\u1f85')]),
+        ('\u1f06',  &[('\u0345',  '\u1f86')]), ('\u1f07',  &[('\u0345',  '\u1f87')]), ('\u1f08',
+         &[('\u0300',  '\u1f0a'), ('\u0301',  '\u1f0c'), ('\u0342',  '\u1f0e'), ('\u0345',
+         '\u1f88')]), ('\u1f09',  &[('\u0300',  '\u1f0b'), ('\u0301',  '\u1f0d'), ('\u0342',
+         '\u1f0f'), ('\u0345',  '\u1f89')]), ('\u1f0a',  &[('\u0345',  '\u1f8a')]), ('\u1f0b',
+         &[('\u0345',  '\u1f8b')]), ('\u1f0c',  &[('\u0345',  '\u1f8c')]), ('\u1f0d',  &[('\u0345',
+         '\u1f8d')]), ('\u1f0e',  &[('\u0345',  '\u1f8e')]), ('\u1f0f',  &[('\u0345',  '\u1f8f')]),
+        ('\u1f10',  &[('\u0300',  '\u1f12'), ('\u0301',  '\u1f14')]), ('\u1f11',  &[('\u0300',
+         '\u1f13'), ('\u0301',  '\u1f15')]), ('\u1f18',  &[('\u0300',  '\u1f1a'), ('\u0301',
+         '\u1f1c')]), ('\u1f19',  &[('\u0300',  '\u1f1b'), ('\u0301',  '\u1f1d')]), ('\u1f20',
+         &[('\u0300',  '\u1f22'), ('\u0301',  '\u1f24'), ('\u0342',  '\u1f26'), ('\u0345',
+         '\u1f90')]), ('\u1f21',  &[('\u0300',  '\u1f23'), ('\u0301',  '\u1f25'), ('\u0342',
+         '\u1f27'), ('\u0345',  '\u1f91')]), ('\u1f22',  &[('\u0345',  '\u1f92')]), ('\u1f23',
+         &[('\u0345',  '\u1f93')]), ('\u1f24',  &[('\u0345',  '\u1f94')]), ('\u1f25',  &[('\u0345',
+         '\u1f95')]), ('\u1f26',  &[('\u0345',  '\u1f96')]), ('\u1f27',  &[('\u0345',  '\u1f97')]),
+        ('\u1f28',  &[('\u0300',  '\u1f2a'), ('\u0301',  '\u1f2c'), ('\u0342',  '\u1f2e'),
+        ('\u0345',  '\u1f98')]), ('\u1f29',  &[('\u0300',  '\u1f2b'), ('\u0301',  '\u1f2d'),
+        ('\u0342',  '\u1f2f'), ('\u0345',  '\u1f99')]), ('\u1f2a',  &[('\u0345',  '\u1f9a')]),
+        ('\u1f2b',  &[('\u0345',  '\u1f9b')]), ('\u1f2c',  &[('\u0345',  '\u1f9c')]), ('\u1f2d',
+         &[('\u0345',  '\u1f9d')]), ('\u1f2e',  &[('\u0345',  '\u1f9e')]), ('\u1f2f',  &[('\u0345',
+         '\u1f9f')]), ('\u1f30',  &[('\u0300',  '\u1f32'), ('\u0301',  '\u1f34'), ('\u0342',
+         '\u1f36')]), ('\u1f31',  &[('\u0300',  '\u1f33'), ('\u0301',  '\u1f35'), ('\u0342',
+         '\u1f37')]), ('\u1f38',  &[('\u0300',  '\u1f3a'), ('\u0301',  '\u1f3c'), ('\u0342',
+         '\u1f3e')]), ('\u1f39',  &[('\u0300',  '\u1f3b'), ('\u0301',  '\u1f3d'), ('\u0342',
+         '\u1f3f')]), ('\u1f40',  &[('\u0300',  '\u1f42'), ('\u0301',  '\u1f44')]), ('\u1f41',
+         &[('\u0300',  '\u1f43'), ('\u0301',  '\u1f45')]), ('\u1f48',  &[('\u0300',  '\u1f4a'),
+        ('\u0301',  '\u1f4c')]), ('\u1f49',  &[('\u0300',  '\u1f4b'), ('\u0301',  '\u1f4d')]),
+        ('\u1f50',  &[('\u0300',  '\u1f52'), ('\u0301',  '\u1f54'), ('\u0342',  '\u1f56')]),
+        ('\u1f51',  &[('\u0300',  '\u1f53'), ('\u0301',  '\u1f55'), ('\u0342',  '\u1f57')]),
+        ('\u1f59',  &[('\u0300',  '\u1f5b'), ('\u0301',  '\u1f5d'), ('\u0342',  '\u1f5f')]),
+        ('\u1f60',  &[('\u0300',  '\u1f62'), ('\u0301',  '\u1f64'), ('\u0342',  '\u1f66'),
+        ('\u0345',  '\u1fa0')]), ('\u1f61',  &[('\u0300',  '\u1f63'), ('\u0301',  '\u1f65'),
+        ('\u0342',  '\u1f67'), ('\u0345',  '\u1fa1')]), ('\u1f62',  &[('\u0345',  '\u1fa2')]),
+        ('\u1f63',  &[('\u0345',  '\u1fa3')]), ('\u1f64',  &[('\u0345',  '\u1fa4')]), ('\u1f65',
+         &[('\u0345',  '\u1fa5')]), ('\u1f66',  &[('\u0345',  '\u1fa6')]), ('\u1f67',  &[('\u0345',
+         '\u1fa7')]), ('\u1f68',  &[('\u0300',  '\u1f6a'), ('\u0301',  '\u1f6c'), ('\u0342',
+         '\u1f6e'), ('\u0345',  '\u1fa8')]), ('\u1f69',  &[('\u0300',  '\u1f6b'), ('\u0301',
+         '\u1f6d'), ('\u0342',  '\u1f6f'), ('\u0345',  '\u1fa9')]), ('\u1f6a',  &[('\u0345',
+         '\u1faa')]), ('\u1f6b',  &[('\u0345',  '\u1fab')]), ('\u1f6c',  &[('\u0345',  '\u1fac')]),
+        ('\u1f6d',  &[('\u0345',  '\u1fad')]), ('\u1f6e',  &[('\u0345',  '\u1fae')]), ('\u1f6f',
+         &[('\u0345',  '\u1faf')]), ('\u1f70',  &[('\u0345',  '\u1fb2')]), ('\u1f74',  &[('\u0345',
+         '\u1fc2')]), ('\u1f7c',  &[('\u0345',  '\u1ff2')]), ('\u1fb6',  &[('\u0345',  '\u1fb7')]),
+        ('\u1fbf',  &[('\u0300',  '\u1fcd'), ('\u0301',  '\u1fce'), ('\u0342',  '\u1fcf')]),
+        ('\u1fc6',  &[('\u0345',  '\u1fc7')]), ('\u1ff6',  &[('\u0345',  '\u1ff7')]), ('\u1ffe',
+         &[('\u0300',  '\u1fdd'), ('\u0301',  '\u1fde'), ('\u0342',  '\u1fdf')]), ('\u2190',
+         &[('\u0338',  '\u219a')]), ('\u2192',  &[('\u0338',  '\u219b')]), ('\u2194',  &[('\u0338',
+         '\u21ae')]), ('\u21d0',  &[('\u0338',  '\u21cd')]), ('\u21d2',  &[('\u0338',  '\u21cf')]),
+        ('\u21d4',  &[('\u0338',  '\u21ce')]), ('\u2203',  &[('\u0338',  '\u2204')]), ('\u2208',
+         &[('\u0338',  '\u2209')]), ('\u220b',  &[('\u0338',  '\u220c')]), ('\u2223',  &[('\u0338',
+         '\u2224')]), ('\u2225',  &[('\u0338',  '\u2226')]), ('\u223c',  &[('\u0338',  '\u2241')]),
+        ('\u2243',  &[('\u0338',  '\u2244')]), ('\u2245',  &[('\u0338',  '\u2247')]), ('\u2248',
+         &[('\u0338',  '\u2249')]), ('\u224d',  &[('\u0338',  '\u226d')]), ('\u2261',  &[('\u0338',
+         '\u2262')]), ('\u2264',  &[('\u0338',  '\u2270')]), ('\u2265',  &[('\u0338',  '\u2271')]),
+        ('\u2272',  &[('\u0338',  '\u2274')]), ('\u2273',  &[('\u0338',  '\u2275')]), ('\u2276',
+         &[('\u0338',  '\u2278')]), ('\u2277',  &[('\u0338',  '\u2279')]), ('\u227a',  &[('\u0338',
+         '\u2280')]), ('\u227b',  &[('\u0338',  '\u2281')]), ('\u227c',  &[('\u0338',  '\u22e0')]),
+        ('\u227d',  &[('\u0338',  '\u22e1')]), ('\u2282',  &[('\u0338',  '\u2284')]), ('\u2283',
+         &[('\u0338',  '\u2285')]), ('\u2286',  &[('\u0338',  '\u2288')]), ('\u2287',  &[('\u0338',
+         '\u2289')]), ('\u2291',  &[('\u0338',  '\u22e2')]), ('\u2292',  &[('\u0338',  '\u22e3')]),
+        ('\u22a2',  &[('\u0338',  '\u22ac')]), ('\u22a8',  &[('\u0338',  '\u22ad')]), ('\u22a9',
+         &[('\u0338',  '\u22ae')]), ('\u22ab',  &[('\u0338',  '\u22af')]), ('\u22b2',  &[('\u0338',
+         '\u22ea')]), ('\u22b3',  &[('\u0338',  '\u22eb')]), ('\u22b4',  &[('\u0338',  '\u22ec')]),
+        ('\u22b5',  &[('\u0338',  '\u22ed')]), ('\u3046',  &[('\u3099',  '\u3094')]), ('\u304b',
+         &[('\u3099',  '\u304c')]), ('\u304d',  &[('\u3099',  '\u304e')]), ('\u304f',  &[('\u3099',
+         '\u3050')]), ('\u3051',  &[('\u3099',  '\u3052')]), ('\u3053',  &[('\u3099',  '\u3054')]),
+        ('\u3055',  &[('\u3099',  '\u3056')]), ('\u3057',  &[('\u3099',  '\u3058')]), ('\u3059',
+         &[('\u3099',  '\u305a')]), ('\u305b',  &[('\u3099',  '\u305c')]), ('\u305d',  &[('\u3099',
+         '\u305e')]), ('\u305f',  &[('\u3099',  '\u3060')]), ('\u3061',  &[('\u3099',  '\u3062')]),
+        ('\u3064',  &[('\u3099',  '\u3065')]), ('\u3066',  &[('\u3099',  '\u3067')]), ('\u3068',
+         &[('\u3099',  '\u3069')]), ('\u306f',  &[('\u3099',  '\u3070'), ('\u309a',  '\u3071')]),
+        ('\u3072',  &[('\u3099',  '\u3073'), ('\u309a',  '\u3074')]), ('\u3075',  &[('\u3099',
+         '\u3076'), ('\u309a',  '\u3077')]), ('\u3078',  &[('\u3099',  '\u3079'), ('\u309a',
+         '\u307a')]), ('\u307b',  &[('\u3099',  '\u307c'), ('\u309a',  '\u307d')]), ('\u309d',
+         &[('\u3099',  '\u309e')]), ('\u30a6',  &[('\u3099',  '\u30f4')]), ('\u30ab',  &[('\u3099',
+         '\u30ac')]), ('\u30ad',  &[('\u3099',  '\u30ae')]), ('\u30af',  &[('\u3099',  '\u30b0')]),
+        ('\u30b1',  &[('\u3099',  '\u30b2')]), ('\u30b3',  &[('\u3099',  '\u30b4')]), ('\u30b5',
+         &[('\u3099',  '\u30b6')]), ('\u30b7',  &[('\u3099',  '\u30b8')]), ('\u30b9',  &[('\u3099',
+         '\u30ba')]), ('\u30bb',  &[('\u3099',  '\u30bc')]), ('\u30bd',  &[('\u3099',  '\u30be')]),
+        ('\u30bf',  &[('\u3099',  '\u30c0')]), ('\u30c1',  &[('\u3099',  '\u30c2')]), ('\u30c4',
+         &[('\u3099',  '\u30c5')]), ('\u30c6',  &[('\u3099',  '\u30c7')]), ('\u30c8',  &[('\u3099',
+         '\u30c9')]), ('\u30cf',  &[('\u3099',  '\u30d0'), ('\u309a',  '\u30d1')]), ('\u30d2',
+         &[('\u3099',  '\u30d3'), ('\u309a',  '\u30d4')]), ('\u30d5',  &[('\u3099',  '\u30d6'),
+        ('\u309a',  '\u30d7')]), ('\u30d8',  &[('\u3099',  '\u30d9'), ('\u309a',  '\u30da')]),
+        ('\u30db',  &[('\u3099',  '\u30dc'), ('\u309a',  '\u30dd')]), ('\u30ef',  &[('\u3099',
+         '\u30f7')]), ('\u30f0',  &[('\u3099',  '\u30f8')]), ('\u30f1',  &[('\u3099',  '\u30f9')]),
+        ('\u30f2',  &[('\u3099',  '\u30fa')]), ('\u30fd',  &[('\u3099',  '\u30fe')]), ('\U00011099',
+         &[('\U000110ba',  '\U0001109a')]), ('\U0001109b',  &[('\U000110ba',  '\U0001109c')]),
+        ('\U000110a5',  &[('\U000110ba',  '\U000110ab')]), ('\U00011131',  &[('\U00011127',
+         '\U0001112e')]), ('\U00011132',  &[('\U00011127',  '\U0001112f')])
+    ];
+
     static combining_class_table : &'static [(char, char, u8)] = &[
         ('\u0300', '\u0314', 230), ('\u0315', '\u0315', 232),
         ('\u0316', '\u0319', 220), ('\u031a', '\u031a', 232),
@@ -3629,6 +3942,28 @@ pub mod normalization {
 
     pub fn decompose_compatible(c: char, i: |char|) { d(c, i, true); }
 
+    pub fn compose(a: char, b: char) -> Option<char> {
+        use cmp::{Equal, Less, Greater};
+        compose_hangul(a, b).or_else(|| {
+            match bsearch_table(a, composition_table) {
+                None => None,
+                Some(candidates) => {
+                    match candidates.bsearch(|&(val, _)| {
+                        if b == val { Equal }
+                        else if val < b { Less }
+                        else { Greater }
+                    }) {
+                        Some(idx) => {
+                            let (_, result) = candidates[idx];
+                            Some(result)
+                        }
+                        None => None
+                    }
+                }
+            }
+        })
+    }
+
     pub fn canonical_combining_class(c: char) -> u8 {
         bsearch_range_value_table(c, combining_class_table)
     }
@@ -3686,6 +4021,7 @@ pub mod normalization {
     static S_COUNT: u32 = (L_COUNT * N_COUNT);
 
     // Decompose a precomposed Hangul syllable
+    #[inline(always)]
     fn decompose_hangul(s: char, f: |char|) {
         use cast::transmute;
 
@@ -3704,6 +4040,25 @@ pub mod normalization {
             }
         }
     }
+
+    // Compose a pair of Hangul Jamo
+    #[inline(always)]
+    fn compose_hangul(a: char, b: char) -> Option<char> {
+        use cast::transmute;
+        let l = a as u32;
+        let v = b as u32;
+        // Compose an LPart and a VPart
+        if L_BASE <= l && l < (L_BASE + L_COUNT) && V_BASE <= v && v < (V_BASE + V_COUNT) {
+            let r = S_BASE + (l - L_BASE) * N_COUNT + (v - V_BASE) * T_COUNT;
+            unsafe { return Some(transmute(r)); }
+        }
+        // Compose an LVPart and a TPart
+        if S_BASE <= l && l <= (S_BASE+S_COUNT-T_COUNT) && T_BASE <= v && v < (T_BASE+T_COUNT) {
+            let r = l + (v - T_BASE);
+            unsafe { return Some(transmute(r)); }
+        }
+        None
+    }
 }
 
 pub mod derived_property {