From 7640f599839c37f9089453c046d4dfdfb517176e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexandre=20Ren=C3=A9?= Date: Fri, 22 Mar 2024 14:03:40 +0100 Subject: [PATCH 1/3] Fix issue with NumPy chain and preallocation=0 Original logic in `grow_append` was to extend data by 10% of its length. This is a problem with the original data length is 0, since it then never extends. This commit amends `grow_append` to always extend by at least 10 elements. --- mcbackend/backends/numpy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mcbackend/backends/numpy.py b/mcbackend/backends/numpy.py index cb37f06..c122166 100644 --- a/mcbackend/backends/numpy.py +++ b/mcbackend/backends/numpy.py @@ -23,7 +23,7 @@ def grow_append( length = len(target) if length == draw_idx: # Grow the array by 10 % - ngrow = math.ceil(0.1 * length) + ngrow = max(10, math.ceil(0.1 * length)) if rigid[vn]: extension = numpy.empty((ngrow,) + numpy.shape(v)) else: @@ -72,7 +72,7 @@ def __init__(self, cmeta: ChainMeta, rmeta: RunMeta, *, preallocate: int) -> Non reserve = (preallocate, *var.shape) target_dict[var.name] = numpy.empty(reserve, var.dtype) else: - target_dict[var.name] = numpy.array([None] * preallocate) + target_dict[var.name] = numpy.array([None] * preallocate, dtype=object) super().__init__(cmeta, rmeta) From 389b10edff3561f25697fe5f243af80127829909 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexandre=20Ren=C3=A9?= Date: Sun, 9 Jun 2024 13:16:42 +0200 Subject: [PATCH 2/3] [NumPyBackend] Prevent ``preallocate=0`` from creating object arrays. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `grow_append` cannot know if ``preallocate = 0`` was used: it only looks at the `rigid` value to determine how to append. Because of this, will always fail when we use `preallocate = 0` with tensor variables, since then the shapes of `target` and `extension` don’t match. A simple fix is to simply deactivate the special behavior for `preallocate = 0`. - This commit extends `test_growing` with a `preallocate` parameter, so that we test both cases where it is 0 and positive. - We also fix `test_growing` to match the new behavior of `grow_append` introduced in #ea812b0, where data arrays always grow by at least 10. --- mcbackend/backends/numpy.py | 4 ++-- mcbackend/test_backend_numpy.py | 30 ++++++++++++++++++++---------- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/mcbackend/backends/numpy.py b/mcbackend/backends/numpy.py index c122166..6245b4c 100644 --- a/mcbackend/backends/numpy.py +++ b/mcbackend/backends/numpy.py @@ -52,7 +52,7 @@ def __init__(self, cmeta: ChainMeta, rmeta: RunMeta, *, preallocate: int) -> Non and grow the allocated memory by 10 % when needed. Exceptions are variables with non-rigid shapes (indicated by 0 in the shape tuple) where the correct amount of memory cannot be pre-allocated. - In these cases, and when ``preallocate == 0`` object arrays are used. + In these cases object arrays are used. """ self._var_is_rigid: Dict[str, bool] = {} self._samples: Dict[str, numpy.ndarray] = {} @@ -68,7 +68,7 @@ def __init__(self, cmeta: ChainMeta, rmeta: RunMeta, *, preallocate: int) -> Non for var in variables: rigid = is_rigid(var.shape) and not var.undefined_ndim and var.dtype != "str" rigid_dict[var.name] = rigid - if preallocate > 0 and rigid: + if rigid: reserve = (preallocate, *var.shape) target_dict[var.name] = numpy.empty(reserve, var.dtype) else: diff --git a/mcbackend/test_backend_numpy.py b/mcbackend/test_backend_numpy.py index f3e49af..baaba84 100644 --- a/mcbackend/test_backend_numpy.py +++ b/mcbackend/test_backend_numpy.py @@ -2,6 +2,7 @@ import hagelkorn import numpy +import pytest from mcbackend.backends.numpy import NumPyBackend, NumPyChain, NumPyRun from mcbackend.core import RunMeta @@ -43,8 +44,9 @@ def test_targets(self): assert chain._samples["changeling"].dtype == object pass - def test_growing(self): - imb = NumPyBackend(preallocate=15) + @pytest.mark.parametrize("preallocate", [0, 75]) + def test_growing(self, preallocate): + imb = NumPyBackend(preallocate=preallocate) rm = RunMeta( rid=hagelkorn.random(), variables=[ @@ -62,19 +64,27 @@ def test_growing(self): ) run = imb.init_run(rm) chain = run.init_chain(0) - assert chain._samples["A"].shape == (15, 2) - assert chain._samples["B"].shape == (15,) - for _ in range(22): + assert chain._samples["A"].shape == (preallocate, 2) + assert chain._samples["B"].shape == (preallocate,) + for _ in range(130): draw = { "A": numpy.random.uniform(size=(2,)), "B": numpy.random.uniform(size=(random.randint(0, 10),)), } chain.append(draw) - # Growth: 15 → 17 → 19 → 21 → 24 - assert chain._samples["A"].shape == (24, 2) - assert chain._samples["B"].shape == (24,) - assert chain.get_draws("A").shape == (22, 2) - assert chain.get_draws("B").shape == (22,) + # NB: Growth algorithm adds max(10, ceil(0.1*length)) + if preallocate == 75: + # 75 → 85 → 95 → 105 → 116 → 128 → 141 + assert chain._samples["A"].shape == (141, 2) + assert chain._samples["B"].shape == (141,) + elif preallocate == 0: + # 10 → 20 → ... → 90 → 100 → 110 → 121 → 134 + assert chain._samples["A"].shape == (134, 2) + assert chain._samples["B"].shape == (134,) + else: + assert False, f"Missing test for {preallocate=}" + assert chain.get_draws("A").shape == (130, 2) + assert chain.get_draws("B").shape == (130,) pass From 5d1d02700ed715efbb832610cd023432b15be7a3 Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Sun, 16 Jun 2024 12:08:59 +0200 Subject: [PATCH 3/3] Remove trailing whitespace --- mcbackend/test_backend_numpy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mcbackend/test_backend_numpy.py b/mcbackend/test_backend_numpy.py index baaba84..86850a3 100644 --- a/mcbackend/test_backend_numpy.py +++ b/mcbackend/test_backend_numpy.py @@ -72,7 +72,7 @@ def test_growing(self, preallocate): "B": numpy.random.uniform(size=(random.randint(0, 10),)), } chain.append(draw) - # NB: Growth algorithm adds max(10, ceil(0.1*length)) + # NB: Growth algorithm adds max(10, ceil(0.1*length)) if preallocate == 75: # 75 → 85 → 95 → 105 → 116 → 128 → 141 assert chain._samples["A"].shape == (141, 2)