Skip to content

Commit 8a4eb9e

Browse files
committed
Skip serializing ascii chars in case LUTs
Since ascii chars are already handled by a special case in the `to_lower` and `to_upper` functions, there's no need to waste space on them in the LUTs.
1 parent 992d154 commit 8a4eb9e

File tree

2 files changed

+11
-40
lines changed

2 files changed

+11
-40
lines changed

library/core/src/unicode/unicode_data.rs

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -606,19 +606,6 @@ pub mod conversions {
606606
table.binary_search_by(|&(key, _)| key.cmp(&c)).ok()
607607
}
608608
static LOWERCASE_TABLE: &[(char, [char; 3])] = &[
609-
('A', ['a', '\u{0}', '\u{0}']), ('B', ['b', '\u{0}', '\u{0}']),
610-
('C', ['c', '\u{0}', '\u{0}']), ('D', ['d', '\u{0}', '\u{0}']),
611-
('E', ['e', '\u{0}', '\u{0}']), ('F', ['f', '\u{0}', '\u{0}']),
612-
('G', ['g', '\u{0}', '\u{0}']), ('H', ['h', '\u{0}', '\u{0}']),
613-
('I', ['i', '\u{0}', '\u{0}']), ('J', ['j', '\u{0}', '\u{0}']),
614-
('K', ['k', '\u{0}', '\u{0}']), ('L', ['l', '\u{0}', '\u{0}']),
615-
('M', ['m', '\u{0}', '\u{0}']), ('N', ['n', '\u{0}', '\u{0}']),
616-
('O', ['o', '\u{0}', '\u{0}']), ('P', ['p', '\u{0}', '\u{0}']),
617-
('Q', ['q', '\u{0}', '\u{0}']), ('R', ['r', '\u{0}', '\u{0}']),
618-
('S', ['s', '\u{0}', '\u{0}']), ('T', ['t', '\u{0}', '\u{0}']),
619-
('U', ['u', '\u{0}', '\u{0}']), ('V', ['v', '\u{0}', '\u{0}']),
620-
('W', ['w', '\u{0}', '\u{0}']), ('X', ['x', '\u{0}', '\u{0}']),
621-
('Y', ['y', '\u{0}', '\u{0}']), ('Z', ['z', '\u{0}', '\u{0}']),
622609
('\u{c0}', ['\u{e0}', '\u{0}', '\u{0}']), ('\u{c1}', ['\u{e1}', '\u{0}', '\u{0}']),
623610
('\u{c2}', ['\u{e2}', '\u{0}', '\u{0}']), ('\u{c3}', ['\u{e3}', '\u{0}', '\u{0}']),
624611
('\u{c4}', ['\u{e4}', '\u{0}', '\u{0}']), ('\u{c5}', ['\u{e5}', '\u{0}', '\u{0}']),
@@ -1456,19 +1443,6 @@ pub mod conversions {
14561443
];
14571444

14581445
static UPPERCASE_TABLE: &[(char, [char; 3])] = &[
1459-
('a', ['A', '\u{0}', '\u{0}']), ('b', ['B', '\u{0}', '\u{0}']),
1460-
('c', ['C', '\u{0}', '\u{0}']), ('d', ['D', '\u{0}', '\u{0}']),
1461-
('e', ['E', '\u{0}', '\u{0}']), ('f', ['F', '\u{0}', '\u{0}']),
1462-
('g', ['G', '\u{0}', '\u{0}']), ('h', ['H', '\u{0}', '\u{0}']),
1463-
('i', ['I', '\u{0}', '\u{0}']), ('j', ['J', '\u{0}', '\u{0}']),
1464-
('k', ['K', '\u{0}', '\u{0}']), ('l', ['L', '\u{0}', '\u{0}']),
1465-
('m', ['M', '\u{0}', '\u{0}']), ('n', ['N', '\u{0}', '\u{0}']),
1466-
('o', ['O', '\u{0}', '\u{0}']), ('p', ['P', '\u{0}', '\u{0}']),
1467-
('q', ['Q', '\u{0}', '\u{0}']), ('r', ['R', '\u{0}', '\u{0}']),
1468-
('s', ['S', '\u{0}', '\u{0}']), ('t', ['T', '\u{0}', '\u{0}']),
1469-
('u', ['U', '\u{0}', '\u{0}']), ('v', ['V', '\u{0}', '\u{0}']),
1470-
('w', ['W', '\u{0}', '\u{0}']), ('x', ['X', '\u{0}', '\u{0}']),
1471-
('y', ['Y', '\u{0}', '\u{0}']), ('z', ['Z', '\u{0}', '\u{0}']),
14721446
('\u{b5}', ['\u{39c}', '\u{0}', '\u{0}']), ('\u{df}', ['S', 'S', '\u{0}']),
14731447
('\u{e0}', ['\u{c0}', '\u{0}', '\u{0}']), ('\u{e1}', ['\u{c1}', '\u{0}', '\u{0}']),
14741448
('\u{e2}', ['\u{c2}', '\u{0}', '\u{0}']), ('\u{e3}', ['\u{c3}', '\u{0}', '\u{0}']),

src/tools/unicode-table-generator/src/case_mapping.rs

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,24 @@
11
use crate::{fmt_list, UnicodeData};
2-
use std::fmt;
2+
use std::{collections::BTreeMap, fmt};
33

44
pub(crate) fn generate_case_mapping(data: &UnicodeData) -> String {
55
let mut file = String::new();
66

77
file.push_str(HEADER.trim_start());
8-
9-
let decl_type = "&[(char, [char; 3])]";
10-
11-
file.push_str(&format!(
12-
"static LOWERCASE_TABLE: {} = &[{}];",
13-
decl_type,
14-
fmt_list(data.to_lower.iter().map(to_mapping))
15-
));
8+
file.push_str(&generate_table("LOWER", &data.to_lower));
169
file.push_str("\n\n");
17-
file.push_str(&format!(
18-
"static UPPERCASE_TABLE: {} = &[{}];",
19-
decl_type,
20-
fmt_list(data.to_upper.iter().map(to_mapping))
21-
));
10+
file.push_str(&generate_table("UPPER", &data.to_upper));
2211
file
2312
}
2413

14+
fn generate_table(case: &str, data: &BTreeMap<u32, (u32, u32, u32)>) -> String {
15+
format!(
16+
"static {}CASE_TABLE: &[(char, [char; 3])] = &[{}];",
17+
case,
18+
fmt_list(data.iter().map(to_mapping).filter(|(k, _)| !k.0.is_ascii()))
19+
)
20+
}
21+
2522
fn to_mapping((key, (a, b, c)): (&u32, &(u32, u32, u32))) -> (CharEscape, [CharEscape; 3]) {
2623
(
2724
CharEscape(std::char::from_u32(*key).unwrap()),

0 commit comments

Comments
 (0)