Skip to content

Commit 9f0b7ab

Browse files
authored
fix(ChangeStream): make CursorNotFound error resumable
NODE-2682
1 parent c1c69fc commit 9f0b7ab

File tree

6 files changed

+191
-22
lines changed

6 files changed

+191
-22
lines changed

lib/error.js

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ const GET_MORE_RESUMABLE_CODES = new Set([
2020
150, // StaleEpoch
2121
13388, // StaleConfig
2222
234, // RetryChangeStream
23-
133 // FailedToSatisfyReadPreference
23+
133, // FailedToSatisfyReadPreference
24+
43 // CursorNotFound
2425
]);
2526

2627
function isResumableError(error, wireVersion) {
@@ -29,6 +30,10 @@ function isResumableError(error, wireVersion) {
2930
}
3031

3132
if (wireVersion >= 9) {
33+
// DRIVERS-1308: For 4.4 drivers running against 4.4 servers, drivers will add a special case to treat the CursorNotFound error code as resumable
34+
if (error.code === 43) {
35+
return true;
36+
}
3237
return error.hasErrorLabel('ResumableChangeStreamError');
3338
}
3439

test/spec/change-stream/README.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,20 @@ Although synchronous drivers must provide a `non-blocking mode of iteration <../
149149

150150
If the test expects an error and one was not thrown by either creating the change stream or executing the test's operations, iterating the change stream once allows for an error to be thrown by a ``getMore`` command. If the test does not expect any error, the change stream should be iterated only until it returns as many result documents as are expected by the test.
151151

152+
Testing on Sharded Clusters
153+
---------------------------
154+
155+
When writing data on sharded clusters, majority-committed data does not always show up in the response of the first
156+
``getMore`` command after the data is written. This is because in sharded clusters, no data from shard A may be returned
157+
until all other shard reports an entry that sorts after the change in shard A.
158+
159+
To account for this, drivers MUST NOT rely on change stream documents in certain batches. For example, if expecting two
160+
documents in a change stream, these may not be part of the same ``getMore`` response, or even be produced in two
161+
subsequent ``getMore`` responses. Drivers MUST allow for a ``getMore`` to produce empty batches when testing on a
162+
sharded cluster. By default, this can take up to 10 seconds, but can be controlled by enabling the ``writePeriodicNoops``
163+
server parameter and configuring the ``periodNoopIntervalSecs`` parameter. Choosing lower values allows for running
164+
change stream tests with smaller timeouts.
165+
152166
Prose Tests
153167
===========
154168

test/spec/change-stream/change-streams-errors.json

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -102,15 +102,12 @@
102102
],
103103
"result": {
104104
"error": {
105-
"code": 280,
106-
"errorLabels": [
107-
"NonResumableChangeStreamError"
108-
]
105+
"code": 280
109106
}
110107
}
111108
},
112109
{
113-
"description": "change stream errors on MaxTimeMSExpired",
110+
"description": "change stream errors on ElectionInProgress",
114111
"minServerVersion": "4.2",
115112
"failPoint": {
116113
"configureFailPoint": "failCommand",
@@ -121,7 +118,7 @@
121118
"failCommands": [
122119
"getMore"
123120
],
124-
"errorCode": 50,
121+
"errorCode": 216,
125122
"closeConnection": false
126123
}
127124
},
@@ -130,13 +127,7 @@
130127
"replicaset",
131128
"sharded"
132129
],
133-
"changeStreamPipeline": [
134-
{
135-
"$project": {
136-
"_id": 0
137-
}
138-
}
139-
],
130+
"changeStreamPipeline": [],
140131
"changeStreamOptions": {},
141132
"operations": [
142133
{
@@ -152,7 +143,7 @@
152143
],
153144
"result": {
154145
"error": {
155-
"code": 50
146+
"code": 216
156147
}
157148
}
158149
}

test/spec/change-stream/change-streams-errors.yml

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,24 +72,21 @@ tests:
7272
result:
7373
error:
7474
code: 280
75-
errorLabels: [ "NonResumableChangeStreamError" ]
7675
-
77-
description: change stream errors on MaxTimeMSExpired
76+
description: change stream errors on ElectionInProgress
7877
minServerVersion: "4.2"
7978
failPoint:
8079
configureFailPoint: failCommand
8180
mode: { times: 1 }
8281
data:
8382
failCommands: ["getMore"]
84-
errorCode: 50 # An error code that's not on the old blacklist or whitelist
83+
errorCode: 216 # An error code that's not on the old blacklist or whitelist
8584
closeConnection: false
8685
target: collection
8786
topology:
8887
- replicaset
8988
- sharded
90-
changeStreamPipeline:
91-
-
92-
$project: { _id: 0 }
89+
changeStreamPipeline: []
9390
changeStreamOptions: {}
9491
operations:
9592
-
@@ -101,4 +98,4 @@ tests:
10198
z: 3
10299
result:
103100
error:
104-
code: 50
101+
code: 216

test/spec/change-stream/change-streams-resume-whitelist.json

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1648,6 +1648,102 @@
16481648
}
16491649
]
16501650
}
1651+
},
1652+
{
1653+
"description": "change stream resumes after CursorNotFound",
1654+
"minServerVersion": "4.2",
1655+
"failPoint": {
1656+
"configureFailPoint": "failCommand",
1657+
"mode": {
1658+
"times": 1
1659+
},
1660+
"data": {
1661+
"failCommands": [
1662+
"getMore"
1663+
],
1664+
"errorCode": 43,
1665+
"closeConnection": false
1666+
}
1667+
},
1668+
"target": "collection",
1669+
"topology": [
1670+
"replicaset",
1671+
"sharded"
1672+
],
1673+
"changeStreamPipeline": [],
1674+
"changeStreamOptions": {},
1675+
"operations": [
1676+
{
1677+
"database": "change-stream-tests",
1678+
"collection": "test",
1679+
"name": "insertOne",
1680+
"arguments": {
1681+
"document": {
1682+
"x": 1
1683+
}
1684+
}
1685+
}
1686+
],
1687+
"expectations": [
1688+
{
1689+
"command_started_event": {
1690+
"command": {
1691+
"aggregate": "test",
1692+
"cursor": {},
1693+
"pipeline": [
1694+
{
1695+
"$changeStream": {}
1696+
}
1697+
]
1698+
},
1699+
"command_name": "aggregate",
1700+
"database_name": "change-stream-tests"
1701+
}
1702+
},
1703+
{
1704+
"command_started_event": {
1705+
"command": {
1706+
"getMore": 42,
1707+
"collection": "test"
1708+
},
1709+
"command_name": "getMore",
1710+
"database_name": "change-stream-tests"
1711+
}
1712+
},
1713+
{
1714+
"command_started_event": {
1715+
"command": {
1716+
"aggregate": "test",
1717+
"cursor": {},
1718+
"pipeline": [
1719+
{
1720+
"$changeStream": {}
1721+
}
1722+
]
1723+
},
1724+
"command_name": "aggregate",
1725+
"database_name": "change-stream-tests"
1726+
}
1727+
}
1728+
],
1729+
"result": {
1730+
"success": [
1731+
{
1732+
"_id": "42",
1733+
"documentKey": "42",
1734+
"operationType": "insert",
1735+
"ns": {
1736+
"db": "change-stream-tests",
1737+
"coll": "test"
1738+
},
1739+
"fullDocument": {
1740+
"x": {
1741+
"$numberInt": "1"
1742+
}
1743+
}
1744+
}
1745+
]
1746+
}
16511747
}
16521748
]
16531749
}

test/spec/change-stream/change-streams-resume-whitelist.yml

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,3 +1105,69 @@ tests:
11051105
fullDocument:
11061106
x:
11071107
$numberInt: "1"
1108+
-
1109+
# CursorNotFound is special-cased to be resumable regardless of server versions or error labels, so this test has
1110+
# no maxWireVersion.
1111+
description: "change stream resumes after CursorNotFound"
1112+
minServerVersion: "4.2"
1113+
failPoint:
1114+
configureFailPoint: failCommand
1115+
mode: { times: 1 }
1116+
data:
1117+
failCommands: ["getMore"]
1118+
errorCode: 43
1119+
closeConnection: false
1120+
target: collection
1121+
topology:
1122+
- replicaset
1123+
- sharded
1124+
changeStreamPipeline: []
1125+
changeStreamOptions: {}
1126+
operations:
1127+
-
1128+
database: *database_name
1129+
collection: *collection_name
1130+
name: insertOne
1131+
arguments:
1132+
document:
1133+
x: 1
1134+
expectations:
1135+
-
1136+
command_started_event:
1137+
command:
1138+
aggregate: *collection_name
1139+
cursor: {}
1140+
pipeline:
1141+
-
1142+
$changeStream: {}
1143+
command_name: aggregate
1144+
database_name: *database_name
1145+
-
1146+
command_started_event:
1147+
command:
1148+
getMore: 42
1149+
collection: *collection_name
1150+
command_name: getMore
1151+
database_name: *database_name
1152+
-
1153+
command_started_event:
1154+
command:
1155+
aggregate: *collection_name
1156+
cursor: {}
1157+
pipeline:
1158+
-
1159+
$changeStream: {}
1160+
command_name: aggregate
1161+
database_name: *database_name
1162+
result:
1163+
success:
1164+
-
1165+
_id: "42"
1166+
documentKey: "42"
1167+
operationType: insert
1168+
ns:
1169+
db: *database_name
1170+
coll: *collection_name
1171+
fullDocument:
1172+
x:
1173+
$numberInt: "1"

0 commit comments

Comments
 (0)