diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs index 1d32dd4973127..d337c78bee8b5 100644 --- a/src/libsyntax/parse/lexer/unicode_chars.rs +++ b/src/libsyntax/parse/lexer/unicode_chars.rs @@ -16,6 +16,22 @@ use errors::DiagnosticBuilder; use super::StringReader; const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[ + (' ', "No-Break Space", ' '), + (' ', "Ogham Space Mark", ' '), + (' ', "En Quad", ' '), + (' ', "Em Quad", ' '), + (' ', "En Space", ' '), + (' ', "Em Space", ' '), + (' ', "Three-Per-Em Space", ' '), + (' ', "Four-Per-Em Space", ' '), + (' ', "Six-Per-Em Space", ' '), + (' ', "Figure Space", ' '), + (' ', "Punctuation Space", ' '), + (' ', "Thin Space", ' '), + (' ', "Hair Space", ' '), + (' ', "Narrow No-Break Space", ' '), + (' ', "Medium Mathematical Space", ' '), + (' ', "Ideographic Space", ' '), ('ߺ', "Nko Lajanyalan", '_'), ('﹍', "Dashed Low Line", '_'), ('﹎', "Centreline Low Line", '_'), @@ -24,14 +40,18 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[ ('‑', "Non-Breaking Hyphen", '-'), ('‒', "Figure Dash", '-'), ('–', "En Dash", '-'), + ('—', "Em Dash", '-'), ('﹘', "Small Em Dash", '-'), ('⁃', "Hyphen Bullet", '-'), ('˗', "Modifier Letter Minus Sign", '-'), ('−', "Minus Sign", '-'), + ('ー', "Katakana-Hiragana Prolonged Sound Mark", '-'), ('٫', "Arabic Decimal Separator", ','), ('‚', "Single Low-9 Quotation Mark", ','), ('ꓹ', "Lisu Letter Tone Na Po", ','), + (',', "Fullwidth Comma", ','), (';', "Greek Question Mark", ';'), + (';', "Fullwidth Semicolon", ';'), ('ः', "Devanagari Sign Visarga", ':'), ('ઃ', "Gujarati Sign Visarga", ':'), (':', "Fullwidth Colon", ':'), @@ -53,6 +73,7 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[ ('ʔ', "Latin Letter Glottal Stop", '?'), ('ॽ', "Devanagari Letter Glottal Stop", '?'), ('Ꭾ', "Cherokee Letter He", '?'), + ('?', "Fullwidth Question Mark", '?'), ('𝅭', "Musical Symbol Combining Augmentation Dot", '.'), ('․', "One Dot Leader", '.'), ('۔', "Arabic Full Stop", '.'), @@ -60,9 +81,12 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[ ('܂', "Syriac Sublinear Full Stop", '.'), ('꘎', "Vai Full Stop", '.'), ('𐩐', "Kharoshthi Punctuation Dot", '.'), + ('·', "Middle Dot", '.'), ('٠', "Arabic-Indic Digit Zero", '.'), ('۰', "Extended Arabic-Indic Digit Zero", '.'), ('ꓸ', "Lisu Letter Tone Mya Ti", '.'), + ('。', "Ideographic Full Stop", '.'), + ('・', "Katakana Middle Dot", '.'), ('՝', "Armenian Comma", '\''), (''', "Fullwidth Apostrophe", '\''), ('‘', "Left Single Quotation Mark", '\''), @@ -108,16 +132,30 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[ ('ײ', "Hebrew Ligature Yiddish Double Yod", '"'), ('❞', "Heavy Double Comma Quotation Mark Ornament", '"'), ('❝', "Heavy Double Turned Comma Quotation Mark Ornament", '"'), - ('[', "Fullwidth Left Square Bracket", '('), ('❨', "Medium Left Parenthesis Ornament", '('), - ('❲', "Light Left Tortoise Shell Bracket Ornament", '('), - ('〔', "Left Tortoise Shell Bracket", '('), ('﴾', "Ornate Left Parenthesis", '('), - (']', "Fullwidth Right Square Bracket", ')'), + ('(', "Fullwidth Left Parenthesis", '('), ('❩', "Medium Right Parenthesis Ornament", ')'), - ('❳', "Light Right Tortoise Shell Bracket Ornament", ')'), - ('〕', "Right Tortoise Shell Bracket", ')'), ('﴿', "Ornate Right Parenthesis", ')'), + (')', "Fullwidth Right Parenthesis", ')'), + ('[', "Fullwidth Left Square Bracket", '['), + ('❲', "Light Left Tortoise Shell Bracket Ornament", '['), + ('「', "Left Corner Bracket", '['), + ('『', "Left White Corner Bracket", '['), + ('【', "Left Black Lenticular Bracket", '['), + ('〔', "Left Tortoise Shell Bracket", '['), + ('〖', "Left White Lenticular Bracket", '['), + ('〘', "Left White Tortoise Shell Bracket", '['), + ('〚', "Left White Square Bracket", '['), + (']', "Fullwidth Right Square Bracket", ']'), + ('❳', "Light Right Tortoise Shell Bracket Ornament", ']'), + ('」', "Right Corner Bracket", ']'), + ('』', "Right White Corner Bracket", ']'), + ('】', "Right Black Lenticular Bracket", ']'), + ('〕', "Right Tortoise Shell Bracket", ']'), + ('〗', "Right White Lenticular Bracket", ']'), + ('〙', "Right White Tortoise Shell Bracket", ']'), + ('〛', "Right White Square Bracket", ']'), ('❴', "Medium Left Curly Bracket Ornament", '{'), ('❵', "Medium Right Curly Bracket Ornament", '}'), ('⁎', "Low Asterisk", '*'), @@ -140,6 +178,8 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[ ('⟍', "Mathematical Falling Diagonal", '\\'), ('⧵', "Reverse Solidus Operator", '\\'), ('⧹', "Big Reverse Solidus", '\\'), + ('、', "Ideographic Comma", '\\'), + ('ヽ', "Katakana Iteration Mark", '\\'), ('㇔', "Cjk Stroke D", '\\'), ('丶', "Cjk Unified Ideograph-4E36", '\\'), ('⼂', "Kangxi Radical Dot", '\\'), @@ -148,15 +188,20 @@ const UNICODE_ARRAY: &'static [(char, &'static str, char)] = &[ ('‹', "Single Left-Pointing Angle Quotation Mark", '<'), ('❮', "Heavy Left-Pointing Angle Quotation Mark Ornament", '<'), ('˂', "Modifier Letter Left Arrowhead", '<'), + ('〈', "Left Angle Bracket", '<'), + ('《', "Left Double Angle Bracket", '<'), ('꓿', "Lisu Punctuation Full Stop", '='), ('›', "Single Right-Pointing Angle Quotation Mark", '>'), ('❯', "Heavy Right-Pointing Angle Quotation Mark Ornament", '>'), ('˃', "Modifier Letter Right Arrowhead", '>'), + ('〉', "Right Angle Bracket", '>'), + ('》', "Right Double Angle Bracket", '>'), ('Ⲻ', "Coptic Capital Letter Dialect-P Ni", '-'), ('Ɂ', "Latin Capital Letter Glottal Stop", '?'), ('Ⳇ', "Coptic Capital Letter Old Coptic Esh", '/'), ]; const ASCII_ARRAY: &'static [(char, &'static str)] = &[ + (' ', "Space"), ('_', "Underscore"), ('-', "Minus/Hyphen"), (',', "Comma"), @@ -169,6 +214,8 @@ const ASCII_ARRAY: &'static [(char, &'static str)] = &[ ('"', "Quotation Mark"), ('(', "Left Parenthesis"), (')', "Right Parenthesis"), + ('[', "Left Square Bracket"), + (']', "Right Square Bracket"), ('{', "Left Curly Brace"), ('}', "Right Curly Brace"), ('*', "Asterisk"),