From e3543a1337fded40c83a7407f4b7367be1e35299 Mon Sep 17 00:00:00 2001 From: Alejandro Aristizabal Date: Thu, 26 Sep 2024 17:23:28 -0500 Subject: [PATCH] Fix issue with sampling without replacement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before this fix uniform sampling is not guaranteed, over-representing the first combination of neighbors, and not sampling the last possible combination Co-authored-by: Jaime Mosquera GutiƩrrez Co-authored-by: Andres Uriza --- csrc/cpu/neighbor_sample_cpu.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/csrc/cpu/neighbor_sample_cpu.cpp b/csrc/cpu/neighbor_sample_cpu.cpp index e300ca11..e5ec2b09 100644 --- a/csrc/cpu/neighbor_sample_cpu.cpp +++ b/csrc/cpu/neighbor_sample_cpu.cpp @@ -325,11 +325,11 @@ hetero_sample(const vector &node_types, } else { // Sample without replacement: unordered_set rnd_indices; - for (int64_t j = col_count - num_samples; j < col_count; j++) { + for (int64_t j = col_count - num_samples + 1; j < col_count + 1; j++) { int64_t rnd = uniform_randint(j); if (!rnd_indices.insert(rnd).second) { - rnd = j; - rnd_indices.insert(j); + rnd = j - 1; + rnd_indices.insert(rnd); } const int64_t offset = col_start + rnd; const int64_t &v = row_data[offset];