@@ -144,6 +144,133 @@ enum TextWidthBasis {
144
144
longestLine,
145
145
}
146
146
147
+ /// A [TextBoundary] subclass for locating word breaks.
148
+ ///
149
+ /// The underlying implementation uses [UAX #29] (https://unicode.org/reports/tr29/)
150
+ /// defined default word boundaries.
151
+ ///
152
+ /// The default word break rules can be tailored to meet the requirements of
153
+ /// different use cases. For instance, the default rule set keeps horizontal
154
+ /// whitespaces together as a single word, which may not make sense in a
155
+ /// word-counting context -- "hello world" counts as 3 words instead of 2.
156
+ /// An example is the [moveByWordBoundary] variant, which is a tailored
157
+ /// word-break locator that more closely matches the default behavior of most
158
+ /// platforms and editors when it comes to handling text editing keyboard
159
+ /// shortcuts that move or delete word by word.
160
+ class WordBoundary extends TextBoundary {
161
+ /// Creates a [WordBoundary] with the text and layout information.
162
+ WordBoundary ._(this ._text, this ._paragraph);
163
+
164
+ final InlineSpan _text;
165
+ final ui.Paragraph _paragraph;
166
+
167
+ @override
168
+ TextRange getTextBoundaryAt (int position) => _paragraph.getWordBoundary (TextPosition (offset: max (position, 0 )));
169
+
170
+ // Combines two UTF-16 code units (high surrogate + low surrogate) into a
171
+ // single code point that represents a supplementary character.
172
+ static int _codePointFromSurrogates (int highSurrogate, int lowSurrogate) {
173
+ assert (
174
+ TextPainter ._isHighSurrogate (highSurrogate),
175
+ 'U+${highSurrogate .toRadixString (16 ).toUpperCase ().padLeft (4 , "0" )}) is not a high surrogate.' ,
176
+ );
177
+ assert (
178
+ TextPainter ._isLowSurrogate (lowSurrogate),
179
+ 'U+${lowSurrogate .toRadixString (16 ).toUpperCase ().padLeft (4 , "0" )}) is not a low surrogate.' ,
180
+ );
181
+ const int base = 0x010000 - (0xD800 << 10 ) - 0xDC00 ;
182
+ return (highSurrogate << 10 ) + lowSurrogate + base ;
183
+ }
184
+
185
+ // The Runes class does not provide random access with a code unit offset.
186
+ int ? _codePointAt (int index) {
187
+ final int ? codeUnitAtIndex = _text.codeUnitAt (index);
188
+ if (codeUnitAtIndex == null ) {
189
+ return null ;
190
+ }
191
+ switch (codeUnitAtIndex & 0xFC00 ) {
192
+ case 0xD800 :
193
+ return _codePointFromSurrogates (codeUnitAtIndex, _text.codeUnitAt (index + 1 )! );
194
+ case 0xDC00 :
195
+ return _codePointFromSurrogates (_text.codeUnitAt (index - 1 )! , codeUnitAtIndex);
196
+ default :
197
+ return codeUnitAtIndex;
198
+ }
199
+ }
200
+
201
+ static bool _isNewline (int codePoint) {
202
+ switch (codePoint) {
203
+ case 0x000A :
204
+ case 0x0085 :
205
+ case 0x000B :
206
+ case 0x000C :
207
+ case 0x2028 :
208
+ case 0x2029 :
209
+ return true ;
210
+ default :
211
+ return false ;
212
+ }
213
+ }
214
+
215
+ bool _skipSpacesAndPunctuations (int offset, bool forward) {
216
+ // Use code point since some punctuations are supplementary characters.
217
+ // "inner" here refers to the code unit that's before the break in the
218
+ // search direction (`forward`).
219
+ final int ? innerCodePoint = _codePointAt (forward ? offset - 1 : offset);
220
+ final int ? outerCodeUnit = _text.codeUnitAt (forward ? offset : offset - 1 );
221
+
222
+ // Make sure the hard break rules in UAX#29 take precedence over the ones we
223
+ // add below. Luckily there're only 4 hard break rules for word breaks, and
224
+ // dictionary based breaking does not introduce new hard breaks:
225
+ // https://unicode-org.github.io/icu/userguide/boundaryanalysis/break-rules.html#word-dictionaries
226
+ //
227
+ // WB1 & WB2: always break at the start or the end of the text.
228
+ final bool hardBreakRulesApply = innerCodePoint == null || outerCodeUnit == null
229
+ // WB3a & WB3b: always break before and after newlines.
230
+ || _isNewline (innerCodePoint) || _isNewline (outerCodeUnit);
231
+ return hardBreakRulesApply || ! RegExp (r'[\p{Space_Separator}\p{Punctuation}]' , unicode: true ).hasMatch (String .fromCharCode (innerCodePoint));
232
+ }
233
+
234
+ /// Returns a [TextBoundary] suitable for handling keyboard navigation
235
+ /// commands that change the current selection word by word.
236
+ ///
237
+ /// This [TextBoundary] is used by text widgets in the flutter framework to
238
+ /// provide default implementation for text editing shortcuts, for example,
239
+ /// "delete to the previous word".
240
+ ///
241
+ /// The implementation applies the same set of rules [WordBoundary] uses,
242
+ /// except that word breaks end on a space separator or a punctuation will be
243
+ /// skipped, to match the behavior of most platforms. Additional rules may be
244
+ /// added in the future to better match platform behaviors.
245
+ late final TextBoundary moveByWordBoundary = _UntilTextBoundary (this , _skipSpacesAndPunctuations);
246
+ }
247
+
248
+ class _UntilTextBoundary extends TextBoundary {
249
+ const _UntilTextBoundary (this ._textBoundary, this ._predicate);
250
+
251
+ final UntilPredicate _predicate;
252
+ final TextBoundary _textBoundary;
253
+
254
+ @override
255
+ int ? getLeadingTextBoundaryAt (int position) {
256
+ if (position < 0 ) {
257
+ return null ;
258
+ }
259
+ final int ? offset = _textBoundary.getLeadingTextBoundaryAt (position);
260
+ return offset == null || _predicate (offset, false )
261
+ ? offset
262
+ : getLeadingTextBoundaryAt (offset - 1 );
263
+ }
264
+
265
+ @override
266
+ int ? getTrailingTextBoundaryAt (int position) {
267
+ final int ? offset = _textBoundary.getTrailingTextBoundaryAt (max (position, 0 ));
268
+ return offset == null || _predicate (offset, true )
269
+ ? offset
270
+ : getTrailingTextBoundaryAt (offset);
271
+ }
272
+ }
273
+
147
274
/// This is used to cache and pass the computed metrics regarding the
148
275
/// caret's size and position. This is preferred due to the expensive
149
276
/// nature of the calculation.
@@ -750,7 +877,7 @@ class TextPainter {
750
877
751
878
// Creates a ui.Paragraph using the current configurations in this class and
752
879
// assign it to _paragraph.
753
- void _createParagraph () {
880
+ ui. Paragraph _createParagraph () {
754
881
assert (_paragraph == null || _rebuildParagraphForPaint);
755
882
final InlineSpan ? text = this .text;
756
883
if (text == null ) {
@@ -763,8 +890,9 @@ class TextPainter {
763
890
_debugMarkNeedsLayoutCallStack = null ;
764
891
return true ;
765
892
}());
766
- _paragraph = builder.build ();
893
+ final ui. Paragraph paragraph = _paragraph = builder.build ();
767
894
_rebuildParagraphForPaint = false ;
895
+ return paragraph;
768
896
}
769
897
770
898
void _layoutParagraph (double minWidth, double maxWidth) {
@@ -861,13 +989,18 @@ class TextPainter {
861
989
canvas.drawParagraph (_paragraph! , offset);
862
990
}
863
991
864
- // Returns true iff the given value is a valid UTF-16 surrogate. The value
992
+ // Returns true iff the given value is a valid UTF-16 high surrogate. The value
865
993
// must be a UTF-16 code unit, meaning it must be in the range 0x0000-0xFFFF.
866
994
//
867
995
// See also:
868
996
// * https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF
869
- static bool _isUtf16Surrogate (int value) {
870
- return value & 0xF800 == 0xD800 ;
997
+ static bool _isHighSurrogate (int value) {
998
+ return value & 0xFC00 == 0xD800 ;
999
+ }
1000
+
1001
+ // Whether the given UTF-16 code unit is a low (second) surrogate.
1002
+ static bool _isLowSurrogate (int value) {
1003
+ return value & 0xFC00 == 0xDC00 ;
871
1004
}
872
1005
873
1006
// Checks if the glyph is either [Unicode.RLM] or [Unicode.LRM]. These values take
@@ -886,7 +1019,7 @@ class TextPainter {
886
1019
return null ;
887
1020
}
888
1021
// TODO(goderbauer): doesn't handle extended grapheme clusters with more than one Unicode scalar value (https://github.com/flutter/flutter/issues/13404).
889
- return _isUtf16Surrogate (nextCodeUnit) ? offset + 2 : offset + 1 ;
1022
+ return _isHighSurrogate (nextCodeUnit) ? offset + 2 : offset + 1 ;
890
1023
}
891
1024
892
1025
/// Returns the closest offset before `offset` at which the input cursor can
@@ -897,7 +1030,7 @@ class TextPainter {
897
1030
return null ;
898
1031
}
899
1032
// TODO(goderbauer): doesn't handle extended grapheme clusters with more than one Unicode scalar value (https://github.com/flutter/flutter/issues/13404).
900
- return _isUtf16Surrogate (prevCodeUnit) ? offset - 2 : offset - 1 ;
1033
+ return _isLowSurrogate (prevCodeUnit) ? offset - 2 : offset - 1 ;
901
1034
}
902
1035
903
1036
// Unicode value for a zero width joiner character.
@@ -916,7 +1049,7 @@ class TextPainter {
916
1049
const int NEWLINE_CODE_UNIT = 10 ;
917
1050
918
1051
// Check for multi-code-unit glyphs such as emojis or zero width joiner.
919
- final bool needsSearch = _isUtf16Surrogate (prevCodeUnit) || _text! .codeUnitAt (offset) == _zwjUtf16 || _isUnicodeDirectionality (prevCodeUnit);
1052
+ final bool needsSearch = _isHighSurrogate (prevCodeUnit) || _isLowSurrogate (prevCodeUnit) || _text! .codeUnitAt (offset) == _zwjUtf16 || _isUnicodeDirectionality (prevCodeUnit);
920
1053
int graphemeClusterLength = needsSearch ? 2 : 1 ;
921
1054
List <TextBox > boxes = < TextBox > [];
922
1055
while (boxes.isEmpty) {
@@ -966,7 +1099,7 @@ class TextPainter {
966
1099
final int nextCodeUnit = plainText.codeUnitAt (min (offset, plainTextLength - 1 ));
967
1100
968
1101
// Check for multi-code-unit glyphs such as emojis or zero width joiner
969
- final bool needsSearch = _isUtf16Surrogate (nextCodeUnit) || nextCodeUnit == _zwjUtf16 || _isUnicodeDirectionality (nextCodeUnit);
1102
+ final bool needsSearch = _isHighSurrogate (nextCodeUnit) || _isLowSurrogate (nextCodeUnit) || nextCodeUnit == _zwjUtf16 || _isUnicodeDirectionality (nextCodeUnit);
970
1103
int graphemeClusterLength = needsSearch ? 2 : 1 ;
971
1104
List <TextBox > boxes = < TextBox > [];
972
1105
while (boxes.isEmpty) {
@@ -1141,6 +1274,18 @@ class TextPainter {
1141
1274
return _paragraph! .getWordBoundary (position);
1142
1275
}
1143
1276
1277
+ /// {@template flutter.painting.TextPainter.wordBoundaries}
1278
+ /// Returns a [TextBoundary] that can be used to perform word boundary analysis
1279
+ /// on the current [text] .
1280
+ ///
1281
+ /// This [TextBoundary] uses word boundary rules defined in [Unicode Standard
1282
+ /// Annex #29](http://www.unicode.org/reports/tr29/#Word_Boundaries).
1283
+ /// {@endtemplate}
1284
+ ///
1285
+ /// Currently word boundary analysis can only be performed after [layout]
1286
+ /// has been called.
1287
+ WordBoundary get wordBoundaries => WordBoundary ._(text! , _paragraph! );
1288
+
1144
1289
/// Returns the text range of the line at the given offset.
1145
1290
///
1146
1291
/// The newline (if any) is not returned as part of the range.
0 commit comments