Skip to content

Surface more errors on failing to initially retrieve a RT #811

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,15 @@ const {
}
} = internal

const UNAUTHORIZED_ERROR_CODE = 'Neo.ClientError.Security.Unauthorized'
const DATABASE_NOT_FOUND_ERROR_CODE =
'Neo.ClientError.Database.DatabaseNotFound'

const PROCEDURE_NOT_FOUND_CODE = 'Neo.ClientError.Procedure.ProcedureNotFound'
const DATABASE_NOT_FOUND_CODE = 'Neo.ClientError.Database.DatabaseNotFound'
const INVALID_BOOKMARK_CODE = 'Neo.ClientError.Transaction.InvalidBookmark'
const INVALID_BOOKMARK_MIXTURE_CODE =
'Neo.ClientError.Transaction.InvalidBookmarkMixture'
const AUTHORIZATION_EXPIRED_CODE =
'Neo.ClientError.Security.AuthorizationExpired'

const SYSTEM_DB_NAME = 'system'
const DEFAULT_DB_NAME = null
const DEFAULT_ROUTING_TABLE_PURGE_DELAY = int(30000)
Expand Down Expand Up @@ -482,14 +488,7 @@ export default class RoutingConnectionProvider extends PooledConnectionProvider
impersonatedUser
)
} catch (error) {
if (error && error.code === DATABASE_NOT_FOUND_ERROR_CODE) {
// not finding the target database is a sign of a configuration issue
throw error
}
this._log.warn(
`unable to fetch routing table because of an error ${error}`
)
return null
return this._handleRediscoveryError(error, currentRouter)
} finally {
session.close()
}
Expand Down Expand Up @@ -532,14 +531,24 @@ export default class RoutingConnectionProvider extends PooledConnectionProvider
impersonatedUser
})
} catch (error) {
// unable to acquire connection towards the given router
if (error && error.code === UNAUTHORIZED_ERROR_CODE) {
// auth error and not finding system database is a sign of a configuration issue
// discovery should not proceed
throw error
}
return null
return this._handleRediscoveryError(error, routerAddress)
}
}

_handleRediscoveryError(error, routerAddress) {
if (_isFailFastError(error) || _isFailFastSecurityError(error)) {
throw error
} else if (error.code === PROCEDURE_NOT_FOUND_CODE) {
// throw when getServers procedure not found because this is clearly a configuration issue
throw newError(
`Server at ${routerAddress.asHostPort()} can't perform routing. Make sure you are connecting to a causal cluster`,
SERVICE_UNAVAILABLE
)
}
this._log.warn(
`unable to fetch routing table because of an error ${error}`
)
return null
}

async _applyRoutingTableIfPossible (currentRoutingTable, newRoutingTable, onDatabaseNameResolved) {
Expand Down Expand Up @@ -676,3 +685,19 @@ class RoutingTableRegistry {
return this
}
}

function _isFailFastError (error) {
return [
DATABASE_NOT_FOUND_CODE,
INVALID_BOOKMARK_CODE,
INVALID_BOOKMARK_MIXTURE_CODE,
].includes(error.code)
}

function _isFailFastSecurityError (error) {
return error.code.startsWith('Neo.ClientError.Security.') &&
![
AUTHORIZATION_EXPIRED_CODE,
].includes(error.code)
}

25 changes: 2 additions & 23 deletions packages/bolt-connection/src/rediscovery/rediscovery.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,7 @@
* limitations under the License.
*/
import RoutingTable from './routing-table'
import { RawRoutingTable } from '../bolt'
import { newError, error, Session } from 'neo4j-driver-core'

const { SERVICE_UNAVAILABLE } = error
const PROCEDURE_NOT_FOUND_CODE = 'Neo.ClientError.Procedure.ProcedureNotFound'
const DATABASE_NOT_FOUND_CODE = 'Neo.ClientError.Database.DatabaseNotFound'
import { Session } from 'neo4j-driver-core'

export default class Rediscovery {
/**
Expand Down Expand Up @@ -75,23 +70,7 @@ export default class Rediscovery {
afterComplete: session._onComplete
},
onCompleted: resolve,
onError: error => {
if (error.code === DATABASE_NOT_FOUND_CODE) {
reject(error)
} else if (error.code === PROCEDURE_NOT_FOUND_CODE) {
// throw when getServers procedure not found because this is clearly a configuration issue
reject(
newError(
`Server at ${routerAddress.asHostPort()} can't perform routing. Make sure you are connecting to a causal cluster`,
SERVICE_UNAVAILABLE
)
)
} else {
// return nothing when failed to connect because code higher in the callstack is still able to retry with a
// different session towards a different router
resolve(RawRoutingTable.ofNull())
}
}
onError: reject,
})
})
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1658,6 +1658,86 @@ describe('#unit RoutingConnectionProvider', () => {
})
}, 10000)

describe('when rediscovery.lookupRoutingTableOnRouter fails', () => {
describe.each([
'Neo.ClientError.Database.DatabaseNotFound',
'Neo.ClientError.Transaction.InvalidBookmark',
'Neo.ClientError.Transaction.InvalidBookmarkMixture',
'Neo.ClientError.Security.Forbidden',
'Neo.ClientError.Security.IWontTellYou'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A prime example of security by obscurity 😜

])('with "%s"', errorCode => {
it('should fail without changing the error ', async () => {
const error = newError('something wrong', errorCode)
const connectionProvider = newRoutingConnectionProviderWithFakeRediscovery(
new FakeRediscovery(null, error),
newPool()
)

let completed = false
try {
await connectionProvider.acquireConnection({ accessMode: READ })
completed = true
} catch (capturedError) {
expect(capturedError).toBe(error)
}

expect(completed).toBe(false)
})
})

describe('with "Neo.ClientError.Procedure.ProcedureNotFound"', () => {
it('should fail with SERVICE_UNAVAILABLE and a message about the need for a causal cluster', async () => {
const error = newError('something wrong', 'Neo.ClientError.Procedure.ProcedureNotFound')
const connectionProvider = newRoutingConnectionProviderWithFakeRediscovery(
new FakeRediscovery(null, error),
newPool()
)

let completed = false
try {
await connectionProvider.acquireConnection({ accessMode: READ })
completed = true
} catch (capturedError) {
expect(capturedError.code).toBe(SERVICE_UNAVAILABLE)
expect(capturedError.message).toBe(
'Server at server-non-existing-seed-router:7687 can\'t '
+ 'perform routing. Make sure you are connecting to a causal cluster'
)
}

expect(completed).toBe(false)
})
})

describe.each([
'Neo.ClientError.Security.AuthorizationExpired',
'Neo.ClientError.MadeUp.Error'
])('with "%s"', errorCode => {
it('should fail with SERVICE_UNAVAILABLE and message about no routing servers available', async () => {
const error = newError('something wrong', errorCode)
const connectionProvider = newRoutingConnectionProviderWithFakeRediscovery(
new FakeRediscovery(null, error),
newPool()
)

let completed = false
try {
await connectionProvider.acquireConnection({ accessMode: READ })
completed = true
} catch (capturedError) {
expect(capturedError.code).toBe(SERVICE_UNAVAILABLE)
expect(capturedError.message).toEqual(expect.stringContaining(
'Could not perform discovery. ' +
'No routing servers available. Known routing table: '
))
}

expect(completed).toBe(false)
})

})
})

describe('multi-database', () => {
it.each(usersDataSet)('should acquire read connection from correct routing table [user=%s]', async (user) => {
const pool = newPool()
Expand Down Expand Up @@ -2843,13 +2923,31 @@ function newRoutingConnectionProvider (
)
}

function newRoutingConnectionProviderWithFakeRediscovery (
fakeRediscovery,
pool = null,
routerToRoutingTable = { null: {} }
) {
const seedRouter = ServerAddress.fromUrl('server-non-existing-seed-router')
return newRoutingConnectionProviderWithSeedRouter(
seedRouter,
[seedRouter],
[],
routerToRoutingTable,
pool,
null,
fakeRediscovery
)
}

function newRoutingConnectionProviderWithSeedRouter (
seedRouter,
seedRouterResolved,
routingTables,
routerToRoutingTable = { null: {} },
connectionPool = null,
routingTablePurgeDelay = null
routingTablePurgeDelay = null,
fakeRediscovery = null
) {
const pool = connectionPool || newPool()
const connectionProvider = new RoutingConnectionProvider({
Expand All @@ -2865,7 +2963,8 @@ function newRoutingConnectionProviderWithSeedRouter (
routingTables.forEach(r => {
connectionProvider._routingTableRegistry.register(r)
})
connectionProvider._rediscovery = new FakeRediscovery(routerToRoutingTable)
connectionProvider._rediscovery =
fakeRediscovery || new FakeRediscovery(routerToRoutingTable)
connectionProvider._hostNameResolver = new FakeDnsResolver(seedRouterResolved)
connectionProvider._useSeedRouter = routingTables.every(
r => r.expirationTime !== Integer.ZERO
Expand Down Expand Up @@ -2999,11 +3098,15 @@ class FakeConnection extends Connection {
}

class FakeRediscovery {
constructor (routerToRoutingTable) {
constructor (routerToRoutingTable, error) {
this._routerToRoutingTable = routerToRoutingTable
this._error = error
}

lookupRoutingTableOnRouter (ignored, database, router, user) {
if (this._error) {
return Promise.reject(this._error)
}
const table = this._routerToRoutingTable[database || null]
if (table) {
let routingTables = table[router.asKey()]
Expand Down
42 changes: 0 additions & 42 deletions packages/bolt-connection/test/rediscovery/rediscovery.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -136,48 +136,6 @@ describe('#unit Rediscovery', () => {
}
})

it('should reject with PROCEDURE_NOT_FOUND_CODE when it happens ', async () => {
const routerAddress = ServerAddress.fromUrl('bolt://localhost:1235')
const expectedError = newError(
`Server at ${routerAddress.asHostPort()} can't perform routing. Make sure you are connecting to a causal cluster`,
SERVICE_UNAVAILABLE
)
try {
const initialAddress = '127.0.0.1'
const routingContext = { context: '1234 ' }

const connection = new FakeConnection().withRequestRoutingInformationMock(
fakeOnError(newError('1de', PROCEDURE_NOT_FOUND_CODE))
)
await lookupRoutingTableOnRouter({
initialAddress,
routingContext,
connection,
routerAddress
})

fail('it should fail')
} catch (error) {
expect(error).toEqual(expectedError)
}
})

it('should return null when it happens an unexpected error ocorrus', async () => {
const initialAddress = '127.0.0.1'
const routingContext = { context: '1234 ' }

const connection = new FakeConnection().withRequestRoutingInformationMock(
fakeOnError(newError('1de', 'abc'))
)
const routingTable = await lookupRoutingTableOnRouter({
initialAddress,
routingContext,
connection
})

expect(routingTable).toEqual(null)
})

it('should throw PROTOCOL_ERROR if the routing table is invalid', async () => {
runWithClockAt(Date.now(), async () => {
try {
Expand Down
23 changes: 16 additions & 7 deletions packages/neo4j-driver/test/driver.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -398,14 +398,23 @@ describe('#integration driver', () => {
)
const session = driver.session()

await expectAsync(session.run('RETURN 1')).toBeRejectedWith(
jasmine.objectContaining({
code: neo4j.error.SERVICE_UNAVAILABLE,
message: jasmine.stringMatching(/No routing servers available/)
})
)
jasmine.objectContaining('')

await session.close()
let completed = false
try {
await session.run('RETURN 1')
completed = true
} catch (error) {
expect(error.code).toEqual(neo4j.error.SERVICE_UNAVAILABLE)
expect(error.message).toEqual(
`Server at ${sharedNeo4j.hostname}:7687 can't perform routing. ` +
'Make sure you are connecting to a causal cluster'
)
} finally {
await session.close()
}

expect(completed).toBe(false)
})

it('should have correct user agent', async () => {
Expand Down
18 changes: 9 additions & 9 deletions packages/testkit-backend/src/skipped-tests/common.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,6 @@ const skippedTests = [
ifEquals('neo4j.test_session_run.TestSessionRun.test_iteration_smaller_than_fetch_size'),
ifEquals('neo4j.test_tx_func_run.TestTxFuncRun.test_tx_func_configuration')
),
skip(
'Fail while enable Temporary:FastFailingDiscovery',
ifEndsWith('test_should_request_rt_from_all_initial_routers_until_successful_on_authorization_expired'),
ifEndsWith('test_should_request_rt_from_all_initial_routers_until_successful_on_unknown_failure'),
ifEndsWith('test_should_fail_with_routing_failure_on_any_security_discovery_failure'),
ifEndsWith('test_should_fail_with_routing_failure_on_invalid_bookmark_mixture_discovery_failure'),
ifEndsWith('test_should_fail_with_routing_failure_on_invalid_bookmark_discovery_failure'),
ifEndsWith('test_should_fail_with_routing_failure_on_forbidden_discovery_failure')
),
Comment on lines -24 to -32
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎉

skip('Flacky because sometimes the connection is actually available',
ifEndsWith('test_should_enforce_pool_size_per_cluster_member')
),
Expand Down Expand Up @@ -108,6 +99,15 @@ const skippedTests = [
'test_should_use_resolver_during_rediscovery_when_existing_routers_fail'
)
),
skip(
'Needs to implement "domain_name_resolver_fn"',
ifEndsWith(
'test_should_request_rt_from_all_initial_routers_until_successful_on_unknown_failure',
),
ifEndsWith(
'test_should_request_rt_from_all_initial_routers_until_successful_on_authorization_expired'
)
),
skip(
'Needs investigation. It is only failing in the RoutingV3 case',
ifEndsWith(
Expand Down