Closed
Description
JedisClusterTopologyProvider use a cache object to optimize performace on frequent cluster requests, but the time value is updated before the cache object, there is a race condition that the old cache including the invalid cluster topology might get returned, which will result in a ClusterCommandExecutionFailureException: Could not get a resource from th pool.
public ClusterTopology getTopology() {
if (cached != null && shouldUseCachedValue()) {
return cached;
}
Map<String, Exception> errors = new LinkedHashMap<>();
List<Entry<String, ConnectionPool>> list = new ArrayList<>(cluster.getClusterNodes().entrySet());
Collections.shuffle(list);
for (Entry<String, ConnectionPool> entry : list) {
try (Connection connection = entry.getValue().getResource()) {
time = System.currentTimeMillis(); // time value is updated before cached object
Set<RedisClusterNode> nodes = Converters.toSetOfRedisClusterNodes(new Jedis(connection).clusterNodes());
cached = new ClusterTopology(nodes);
return cached;
} catch (Exception ex) {
errors.put(entry.getKey(), ex);
}
}
StringBuilder stringBuilder = new StringBuilder();
for (Entry<String, Exception> entry : errors.entrySet()) {
stringBuilder.append(String.format("\r\n\t- %s failed: %s", entry.getKey(), entry.getValue().getMessage()));
}
throw new ClusterStateFailureException(
"Could not retrieve cluster information; CLUSTER NODES returned with error" + stringBuilder);
}