Skip to content

Commit 202826a

Browse files
authored
Merge pull request swiftlang#14 from rintaro/linetable-sr9311
[SKSupport.LineTable] Fix UTF16 offset calculation
2 parents 23394dd + c97a244 commit 202826a

File tree

4 files changed

+195
-144
lines changed

4 files changed

+195
-144
lines changed

Sources/SKSupport/LineTable.swift

Lines changed: 100 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -11,120 +11,59 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
public struct LineTable: Hashable {
14-
15-
public struct Line: Hashable {
16-
17-
/// The zero-based line number.
18-
public var index: Int
19-
20-
/// The UTF-8 byte offset of the start of the line.
21-
public var utf8Offset: Int
22-
23-
/// The UTF-16 code-unit offset of the start of the line.
24-
public var utf16Offset: Int { return content.startIndex.encodedOffset }
25-
26-
/// The content of the line, including the newline.
27-
public var content: Substring
28-
29-
@inlinable
30-
public init(index: Int, utf8Offset: Int, content: Substring) {
31-
self.index = index
32-
self.utf8Offset = utf8Offset
33-
self.content = content
34-
}
35-
}
36-
37-
@usableFromInline
38-
struct LineData: Hashable {
39-
@usableFromInline
40-
var stringIndex: String.Index
41-
@usableFromInline
42-
var utf8Offset: Int
43-
}
44-
4514
@usableFromInline
46-
var impl: [LineData]
15+
var impl: [String.Index]
4716

4817
public var content: String
4918

5019
public init(_ string: String) {
5120
content = string
5221

53-
if content.isEmpty {
54-
impl = [LineData(stringIndex: content.startIndex, utf8Offset: 0)]
55-
return
56-
}
57-
5822
var i = string.startIndex
59-
var utf8Offset = 0
60-
var prevUTF16: UInt16 = 0
61-
62-
impl = [LineData(stringIndex: i, utf8Offset: utf8Offset)]
63-
64-
let utf16 = string.utf16
65-
23+
impl = [i]
6624
while i != string.endIndex {
67-
let next = utf16.index(after: i)
68-
69-
let c = utf16[i]
70-
utf8Offset += _utf8Count(c, prev: prevUTF16)
71-
prevUTF16 = c
72-
73-
if c == /*newline*/10 {
74-
impl.append(LineData(stringIndex: next, utf8Offset: utf8Offset))
25+
let c = string[i]
26+
string.formIndex(after: &i)
27+
if c == "\n" || c == "\r\n" || c == "\r" {
28+
impl.append(i)
7529
}
76-
77-
i = next
7830
}
7931
}
8032

8133
/// The number of lines.
8234
@inlinable
8335
public var count: Int { return impl.count }
8436

85-
/// Returns the given (zero-based) line.
86-
@inlinable
87-
public subscript(_ line: Int) -> Line {
88-
let data = impl[line]
89-
return Line(
90-
index: line,
91-
utf8Offset: data.utf8Offset,
92-
content: content[data.stringIndex..<nextLineStart(line)]
93-
)
94-
}
95-
96-
/// Returns the line containing the given UTF-8 byte offset.
37+
/// Returns the given (zero-based) line as a Substring, including the newline.
38+
///
39+
/// - parameter line: Line number (zero-based).
9740
@inlinable
98-
public subscript(utf8Offset offset: Int) -> Line {
99-
// FIXME: binary search
100-
for (i, data) in impl.enumerated() {
101-
if data.utf8Offset > offset {
102-
assert(i > 0)
103-
return self[i - 1]
104-
}
105-
}
106-
return self[count - 1]
41+
public subscript(line: Int) -> Substring {
42+
return content[impl[line] ..< (line == count - 1 ? content.endIndex : impl[line + 1])]
10743
}
10844

109-
@inlinable
110-
public subscript(utf16Offset offset: Int) -> Line {
111-
// FIXME: binary search
112-
for (i, data) in impl.enumerated() {
113-
if data.stringIndex.encodedOffset > offset {
114-
assert(i > 0)
115-
return self[i - 1]
45+
/// Translate String.Index to logical line/utf16 pair.
46+
@usableFromInline
47+
func lineAndUTF16ColumnOf(_ index: String.Index, fromLine: Int = 0) -> (line: Int, utf16Column: Int) {
48+
precondition(0 <= fromLine && fromLine < count)
49+
50+
// Binary search.
51+
var lower = fromLine
52+
var upper = count
53+
while true {
54+
let mid = lower + (upper - lower) / 2
55+
let lineStartIndex = impl[mid]
56+
if mid == lower || lineStartIndex == index {
57+
return (
58+
line: mid,
59+
utf16Column: content.utf16.distance(from: lineStartIndex, to: index)
60+
)
61+
} else if lineStartIndex < index {
62+
lower = mid
63+
} else {
64+
upper = mid
11665
}
11766
}
118-
return self[count - 1]
119-
}
120-
121-
@inlinable
122-
func nextLineStart(_ line: Int) -> String.Index {
123-
if line == count - 1 {
124-
return content.endIndex
125-
} else {
126-
return impl[line + 1].stringIndex
127-
}
12867
}
12968
}
13069

@@ -157,8 +96,8 @@ extension LineTable {
15796
utf16Offset toOff: Int,
15897
with replacement: String)
15998
{
160-
let start = String.Index(encodedOffset: self[fromLine].utf16Offset + fromOff)
161-
let end = String.Index(encodedOffset: self[toLine].utf16Offset + toOff)
99+
let start = content.utf16.index(impl[fromLine], offsetBy: fromOff)
100+
let end = content.utf16.index(impl[toLine], offsetBy: toOff)
162101

163102
var newText = self.content
164103
newText.replaceSubrange(start..<end, with: replacement)
@@ -172,29 +111,81 @@ extension LineTable {
172111
/// - parameter fromOff: Starting UTF-16 column offset (zero-based).
173112
/// - parameter utf16Length: The number of UTF-16 code units to replace.
174113
/// - parameter replacement: The new text for the given range.
175-
@inlinable
176114
mutating public func replace(
177115
fromLine: Int,
178116
utf16Offset fromOff: Int,
179117
utf16Length: Int,
180118
with replacement: String)
181119
{
182-
let endOff = self[fromLine].utf16Offset + fromOff + utf16Length
183-
let endLine = self[utf16Offset: endOff]
184-
185-
self.replace(fromLine: fromLine, utf16Offset: fromOff, toLine: endLine.index, utf16Offset: endOff - endLine.utf16Offset, with: replacement)
120+
let start = content.utf16.index(impl[fromLine], offsetBy: fromOff)
121+
let end = content.utf16.index(start, offsetBy: utf16Length)
122+
let (toLine, toOff) = lineAndUTF16ColumnOf(end, fromLine: fromLine)
123+
self.replace(fromLine: fromLine, utf16Offset: fromOff, toLine: toLine, utf16Offset: toOff, with: replacement)
186124
}
187125
}
188126

189-
// Note: This is copied from the stdlib.
190-
// Used to calculate a running count. For non-BMP scalars, it's important if the
191-
// prior code unit was a leading surrogate (validity).
192-
private func _utf8Count(_ utf16CU: UInt16, prev: UInt16) -> Int {
193-
switch utf16CU {
194-
case 0..<0x80: return 1
195-
case 0x80..<0x800: return 2
196-
case 0x800..<0xDC00: return 3
197-
case 0xDC00..<0xE000: return UTF16.isLeadSurrogate(prev) ? 1 : 3
198-
default: return 3
127+
extension LineTable {
128+
129+
// MARK: - Position translation
130+
131+
/// Returns `String.Index` of given logical position.
132+
///
133+
/// - parameter line: Line number (zero-based).
134+
/// - parameter utf16Column: UTF-16 column offset (zero-based).
135+
@inlinable
136+
public func stringIndexOf(line: Int, utf16Column: Int) -> String.Index? {
137+
guard line < count else {
138+
// Line out of range.
139+
return nil
140+
}
141+
let lineSlice = self[line]
142+
guard utf16Column <= content.utf16.distance(from: lineSlice.startIndex, to: lineSlice.endIndex) else {
143+
// Column out of range.
144+
return nil
145+
}
146+
return content.utf16.index(lineSlice.startIndex, offsetBy: utf16Column)
147+
}
148+
149+
/// Returns UTF8 buffer offset of given logical position.
150+
///
151+
/// - parameter line: Line number (zero-based).
152+
/// - parameter utf16Column: UTF-16 column offset (zero-based).
153+
@inlinable
154+
public func utf8OffsetOf(line: Int, utf16Column: Int) -> Int? {
155+
guard let stringIndex = stringIndexOf(line: line, utf16Column: utf16Column) else {
156+
return nil
157+
}
158+
return content.utf8.distance(from: content.startIndex, to: stringIndex)
159+
}
160+
161+
/// Returns logical position of given source offset.
162+
///
163+
/// - parameter utf8Offset: UTF-8 buffer offset (zero-based).
164+
@inlinable
165+
public func lineAndUTF16ColumnOf(utf8Offset: Int) -> (line: Int, utf16Column: Int)? {
166+
guard utf8Offset <= content.utf8.count else {
167+
// Offset ouf of range.
168+
return nil
169+
}
170+
return lineAndUTF16ColumnOf(content.utf8.index(content.startIndex, offsetBy: utf8Offset))
171+
}
172+
173+
/// Returns UTF16 column offset at UTF8 version of logical position.
174+
///
175+
/// - parameter line: Line number (zero-based).
176+
/// - parameter utf8Column: UTF-8 column offset (zero-based).
177+
@inlinable
178+
public func utf16ColumnAt(line: Int, utf8Column: Int) -> Int? {
179+
guard line < count else {
180+
// Line out of range.
181+
return nil
182+
}
183+
let lineSlice = self[line]
184+
guard utf8Column <= content.utf8.distance(from: lineSlice.startIndex, to: lineSlice.endIndex) else {
185+
// Column out of range
186+
return nil
187+
}
188+
let targetIndex = content.utf8.index(lineSlice.startIndex, offsetBy: utf8Column)
189+
return content.utf16.distance(from: lineSlice.startIndex, to: targetIndex)
199190
}
200191
}

Sources/SourceKit/DocumentManager.swift

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,7 @@ public struct DocumentSnapshot {
2727
}
2828

2929
func index(of pos: Position) -> String.Index? {
30-
// FIXME: TEST ME
31-
guard pos.line < lineTable.count else { return nil }
32-
let lineData = lineTable[pos.line]
33-
let utf16Offset = lineData.utf16Offset + pos.utf16index
34-
return String.Index(encodedOffset: utf16Offset)
30+
return lineTable.stringIndexOf(line: pos.line, utf16Column: pos.utf16index)
3531
}
3632
}
3733

Sources/SourceKit/sourcekitd/SwiftLanguageServer.swift

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -552,31 +552,19 @@ extension SwiftLanguageServer {
552552
extension DocumentSnapshot {
553553

554554
func utf8Offset(of pos: Position) -> Int? {
555-
// FIXME: TEST ME
556-
guard pos.line < lineTable.count else { return nil }
557-
let lineData = lineTable[pos.line]
558-
let utf16Offset = lineData.utf16Offset + pos.utf16index
559-
let index = String.Index(encodedOffset: utf16Offset)
560-
guard index <= lineData.content.endIndex else { return nil }
561-
return lineData.utf8Offset + lineData.content.prefix(upTo: index).utf8.count
555+
return lineTable.utf8OffsetOf(line: pos.line, utf16Column: pos.utf16index)
562556
}
563557

564558
func positionOf(utf8Offset: Int) -> Position? {
565-
// FIXME: TEST ME
566-
let line = lineTable[utf8Offset: utf8Offset]
567-
let column = utf8Offset - line.utf8Offset
568-
return positionOf(zeroBasedLine: line.index, utf8Column: column)
559+
return lineTable.lineAndUTF16ColumnOf(utf8Offset: utf8Offset).map {
560+
Position(line: $0.line, utf16index: $0.utf16Column)
561+
}
569562
}
570563

571564
func positionOf(zeroBasedLine: Int, utf8Column: Int) -> Position? {
572-
// FIXME: TEST ME
573-
guard zeroBasedLine < lineTable.count else { return nil }
574-
let lineData = lineTable[zeroBasedLine]
575-
let index = lineData.content.dropFirst(utf8Column).startIndex
576-
return Position(
577-
line: zeroBasedLine,
578-
utf16index: index.encodedOffset - lineData.utf16Offset
579-
)
565+
return lineTable.utf16ColumnAt(line: zeroBasedLine, utf8Column: utf8Column).map {
566+
Position(line: zeroBasedLine, utf16index: $0)
567+
}
580568
}
581569
}
582570

0 commit comments

Comments
 (0)