11
11
//===----------------------------------------------------------------------===//
12
12
13
13
public struct LineTable : Hashable {
14
-
15
- public struct Line : Hashable {
16
-
17
- /// The zero-based line number.
18
- public var index : Int
19
-
20
- /// The UTF-8 byte offset of the start of the line.
21
- public var utf8Offset : Int
22
-
23
- /// The UTF-16 code-unit offset of the start of the line.
24
- public var utf16Offset : Int { return content. startIndex. encodedOffset }
25
-
26
- /// The content of the line, including the newline.
27
- public var content : Substring
28
-
29
- @inlinable
30
- public init ( index: Int , utf8Offset: Int , content: Substring ) {
31
- self . index = index
32
- self . utf8Offset = utf8Offset
33
- self . content = content
34
- }
35
- }
36
-
37
- @usableFromInline
38
- struct LineData : Hashable {
39
- @usableFromInline
40
- var stringIndex : String . Index
41
- @usableFromInline
42
- var utf8Offset : Int
43
- }
44
-
45
14
@usableFromInline
46
- var impl : [ LineData ]
15
+ var impl : [ String . Index ]
47
16
48
17
public var content : String
49
18
50
19
public init ( _ string: String ) {
51
20
content = string
52
21
53
- if content. isEmpty {
54
- impl = [ LineData ( stringIndex: content. startIndex, utf8Offset: 0 ) ]
55
- return
56
- }
57
-
58
22
var i = string. startIndex
59
- var utf8Offset = 0
60
- var prevUTF16 : UInt16 = 0
61
-
62
- impl = [ LineData ( stringIndex: i, utf8Offset: utf8Offset) ]
63
-
64
- let utf16 = string. utf16
65
-
23
+ impl = [ i]
66
24
while i != string. endIndex {
67
- let next = utf16. index ( after: i)
68
-
69
- let c = utf16 [ i]
70
- utf8Offset += _utf8Count ( c, prev: prevUTF16)
71
- prevUTF16 = c
72
-
73
- if c == /*newline*/10 {
74
- impl. append ( LineData ( stringIndex: next, utf8Offset: utf8Offset) )
25
+ let c = string [ i]
26
+ string. formIndex ( after: & i)
27
+ if c == " \n " || c == " \r \n " || c == " \r " {
28
+ impl. append ( i)
75
29
}
76
-
77
- i = next
78
30
}
79
31
}
80
32
81
33
/// The number of lines.
82
34
@inlinable
83
35
public var count : Int { return impl. count }
84
36
85
- /// Returns the given (zero-based) line.
86
- @inlinable
87
- public subscript( _ line: Int ) -> Line {
88
- let data = impl [ line]
89
- return Line (
90
- index: line,
91
- utf8Offset: data. utf8Offset,
92
- content: content [ data. stringIndex..< nextLineStart ( line) ]
93
- )
94
- }
95
-
96
- /// Returns the line containing the given UTF-8 byte offset.
37
+ /// Returns the given (zero-based) line as a Substring, including the newline.
38
+ ///
39
+ /// - parameter line: Line number (zero-based).
97
40
@inlinable
98
- public subscript( utf8Offset offset: Int ) -> Line {
99
- // FIXME: binary search
100
- for (i, data) in impl. enumerated ( ) {
101
- if data. utf8Offset > offset {
102
- assert ( i > 0 )
103
- return self [ i - 1 ]
104
- }
105
- }
106
- return self [ count - 1 ]
41
+ public subscript( line: Int ) -> Substring {
42
+ return content [ impl [ line] ..< ( line == count - 1 ? content. endIndex : impl [ line + 1 ] ) ]
107
43
}
108
44
109
- @inlinable
110
- public subscript( utf16Offset offset: Int ) -> Line {
111
- // FIXME: binary search
112
- for (i, data) in impl. enumerated ( ) {
113
- if data. stringIndex. encodedOffset > offset {
114
- assert ( i > 0 )
115
- return self [ i - 1 ]
45
+ /// Translate String.Index to logical line/utf16 pair.
46
+ @usableFromInline
47
+ func lineAndUTF16ColumnOf( _ index: String . Index , fromLine: Int = 0 ) -> ( line: Int , utf16Column: Int ) {
48
+ precondition ( 0 <= fromLine && fromLine < count)
49
+
50
+ // Binary search.
51
+ var lower = fromLine
52
+ var upper = count
53
+ while true {
54
+ let mid = lower + ( upper - lower) / 2
55
+ let lineStartIndex = impl [ mid]
56
+ if mid == lower || lineStartIndex == index {
57
+ return (
58
+ line: mid,
59
+ utf16Column: content. utf16. distance ( from: lineStartIndex, to: index)
60
+ )
61
+ } else if lineStartIndex < index {
62
+ lower = mid
63
+ } else {
64
+ upper = mid
116
65
}
117
66
}
118
- return self [ count - 1 ]
119
- }
120
-
121
- @inlinable
122
- func nextLineStart( _ line: Int ) -> String . Index {
123
- if line == count - 1 {
124
- return content. endIndex
125
- } else {
126
- return impl [ line + 1 ] . stringIndex
127
- }
128
67
}
129
68
}
130
69
@@ -157,8 +96,8 @@ extension LineTable {
157
96
utf16Offset toOff: Int ,
158
97
with replacement: String )
159
98
{
160
- let start = String . Index ( encodedOffset : self [ fromLine] . utf16Offset + fromOff)
161
- let end = String . Index ( encodedOffset : self [ toLine] . utf16Offset + toOff)
99
+ let start = content . utf16 . index ( impl [ fromLine] , offsetBy : fromOff)
100
+ let end = content . utf16 . index ( impl [ toLine] , offsetBy : toOff)
162
101
163
102
var newText = self . content
164
103
newText. replaceSubrange ( start..< end, with: replacement)
@@ -172,29 +111,81 @@ extension LineTable {
172
111
/// - parameter fromOff: Starting UTF-16 column offset (zero-based).
173
112
/// - parameter utf16Length: The number of UTF-16 code units to replace.
174
113
/// - parameter replacement: The new text for the given range.
175
- @inlinable
176
114
mutating public func replace(
177
115
fromLine: Int ,
178
116
utf16Offset fromOff: Int ,
179
117
utf16Length: Int ,
180
118
with replacement: String )
181
119
{
182
- let endOff = self [ fromLine] . utf16Offset + fromOff + utf16Length
183
- let endLine = self [ utf16Offset : endOff ]
184
-
185
- self . replace ( fromLine: fromLine, utf16Offset: fromOff, toLine: endLine . index , utf16Offset: endOff - endLine . utf16Offset , with: replacement)
120
+ let start = content . utf16 . index ( impl [ fromLine] , offsetBy : fromOff)
121
+ let end = content . utf16 . index ( start , offsetBy : utf16Length )
122
+ let ( toLine , toOff ) = lineAndUTF16ColumnOf ( end , fromLine : fromLine )
123
+ self . replace ( fromLine: fromLine, utf16Offset: fromOff, toLine: toLine , utf16Offset: toOff , with: replacement)
186
124
}
187
125
}
188
126
189
- // Note: This is copied from the stdlib.
190
- // Used to calculate a running count. For non-BMP scalars, it's important if the
191
- // prior code unit was a leading surrogate (validity).
192
- private func _utf8Count( _ utf16CU: UInt16 , prev: UInt16 ) -> Int {
193
- switch utf16CU {
194
- case 0 ..< 0x80 : return 1
195
- case 0x80 ..< 0x800 : return 2
196
- case 0x800 ..< 0xDC00 : return 3
197
- case 0xDC00 ..< 0xE000 : return UTF16 . isLeadSurrogate ( prev) ? 1 : 3
198
- default : return 3
127
+ extension LineTable {
128
+
129
+ // MARK: - Position translation
130
+
131
+ /// Returns `String.Index` of given logical position.
132
+ ///
133
+ /// - parameter line: Line number (zero-based).
134
+ /// - parameter utf16Column: UTF-16 column offset (zero-based).
135
+ @inlinable
136
+ public func stringIndexOf( line: Int , utf16Column: Int ) -> String . Index ? {
137
+ guard line < count else {
138
+ // Line out of range.
139
+ return nil
140
+ }
141
+ let lineSlice = self [ line]
142
+ guard utf16Column <= content. utf16. distance ( from: lineSlice. startIndex, to: lineSlice. endIndex) else {
143
+ // Column out of range.
144
+ return nil
145
+ }
146
+ return content. utf16. index ( lineSlice. startIndex, offsetBy: utf16Column)
147
+ }
148
+
149
+ /// Returns UTF8 buffer offset of given logical position.
150
+ ///
151
+ /// - parameter line: Line number (zero-based).
152
+ /// - parameter utf16Column: UTF-16 column offset (zero-based).
153
+ @inlinable
154
+ public func utf8OffsetOf( line: Int , utf16Column: Int ) -> Int ? {
155
+ guard let stringIndex = stringIndexOf ( line: line, utf16Column: utf16Column) else {
156
+ return nil
157
+ }
158
+ return content. utf8. distance ( from: content. startIndex, to: stringIndex)
159
+ }
160
+
161
+ /// Returns logical position of given source offset.
162
+ ///
163
+ /// - parameter utf8Offset: UTF-8 buffer offset (zero-based).
164
+ @inlinable
165
+ public func lineAndUTF16ColumnOf( utf8Offset: Int ) -> ( line: Int , utf16Column: Int ) ? {
166
+ guard utf8Offset <= content. utf8. count else {
167
+ // Offset ouf of range.
168
+ return nil
169
+ }
170
+ return lineAndUTF16ColumnOf ( content. utf8. index ( content. startIndex, offsetBy: utf8Offset) )
171
+ }
172
+
173
+ /// Returns UTF16 column offset at UTF8 version of logical position.
174
+ ///
175
+ /// - parameter line: Line number (zero-based).
176
+ /// - parameter utf8Column: UTF-8 column offset (zero-based).
177
+ @inlinable
178
+ public func utf16ColumnAt( line: Int , utf8Column: Int ) -> Int ? {
179
+ guard line < count else {
180
+ // Line out of range.
181
+ return nil
182
+ }
183
+ let lineSlice = self [ line]
184
+ guard utf8Column <= content. utf8. distance ( from: lineSlice. startIndex, to: lineSlice. endIndex) else {
185
+ // Column out of range
186
+ return nil
187
+ }
188
+ let targetIndex = content. utf8. index ( lineSlice. startIndex, offsetBy: utf8Column)
189
+ return content. utf16. distance ( from: lineSlice. startIndex, to: targetIndex)
199
190
}
200
191
}
0 commit comments