Description
Describe the bug
Integer searches larger than 2 ^ 31 -1
(es long limit) yield invalid results, returns non matching items for queried attribute.
However if you change the dynamic mapping here to:
{"numerics": {"match_mapping_type": "long", "mapping": {"type": "double"}}}
The current code suggest numerics are cast from long to float, but a long (2^63 -1
) is not a float and may be a larger integer than a float's capacity (2^31-1
).
I'm curious if there was an intential decision to use long -> float as it appears to lead to unwanted behavior. I have had success searching when mapping configured long->double
. There may be space impacts with 32-bit vs 64-bit attributes.
To Reproduce
Execute test below in test_api
@pytest.mark.asyncio
@pytest.mark.parametrize("value, expected", [
(2147483647, 1), # Int Limit
(2147483647 + 5000, 1), # Above int Limit
(32767, 1), # Short Limit,
# All below fail, return three values
(21474836470, 1), # Above int Limit
(9223372036854775807, 1), # Long Limit
])
async def test_big_int_eo_search(app_client, txn_client, ctx, value, expected):
random_str = ''.join(random.choice("abcdef") for i in range(random.randint(1, 5)))
collection_id = f"test-collection-eo-{random_str}"
test_item = deepcopy(ctx.item)
test_item["collection"] = collection_id
test_collection = ctx.collection
test_collection["id"] = collection_id
# type number
attr = "eo:full_width_half_max"
stac_extensions = [
"https://stac-extensions.github.io/eo/v2.0.0/schema.json",
]
test_collection["stac_extensions"] = stac_extensions
test_item["stac_extensions"] = stac_extensions
await create_collection(txn_client, test_collection)
for val in [value,
value + random.randint(10, 1010),
value - random.randint(10, 1010),]:
item = deepcopy(test_item)
item["id"] = str(uuid.uuid4())
item["properties"][attr] = val
await create_item(txn_client, item)
params = {
"collections": [item["collection"]],
"filter": {
"args": [
{
"args": [
{"property": f"properties.{attr}"},
value,
],
"op": "=",
}
],
"op": "and",
},
}
resp = await app_client.post("/search", json=params)
resp_json = resp.json()
results = set([x["properties"][attr] for x in resp_json["features"]])
assert len(results) == expected
Expected behavior
Return only the valid result
Additional context
Add any other context about the problem here.
I am unable currently to run the test recast as double in this repo, but have tested this configuration on a fork.