-
Notifications
You must be signed in to change notification settings - Fork 228
Implement gmt xarray BackendEntrypoint #3919
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 15 commits
b81aa06
2b98a65
f41a812
b71dcf5
60c6726
bf09a39
e622878
b9e3eb4
3a109ac
d764f78
98910f8
126560e
436f0fc
328b6ab
06aa39d
c79e50a
c0ccd4b
c5ebf9f
17fd9d7
1c8af3a
1d9f21a
ca9132f
9669bcf
f722b47
a952df9
0c6b8a3
3a256a6
64c50c9
e6ee6fa
5b90a8a
31d1999
6a4e1a4
91df0ee
c97bd82
2e5fb6b
b136e26
19c5252
3bb1f44
8984c1c
f2446fc
52c0b7e
143a375
acab8b2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -154,7 +154,7 @@ def load_static_earth_relief(): | |
A grid of Earth relief for internal tests. | ||
""" | ||
fname = which("@static_earth_relief.nc", download="c") | ||
return load_dataarray(fname) | ||
return load_dataarray(fname, decode_kind="grid") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again, with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Started draft PR at #3922, will cherrypick things across. |
||
|
||
|
||
def skip_if_no(package): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,10 +2,14 @@ | |
PyGMT input/output (I/O) utilities. | ||
""" | ||
|
||
from typing import Literal | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since we're going to deprecate There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we want to change the default engine in load_dataarray to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changing the default engine to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
||
import xarray as xr | ||
|
||
|
||
def load_dataarray(filename_or_obj, **kwargs): | ||
def load_dataarray( | ||
filename_or_obj, *, engine="gmt", decode_kind: Literal["grid", "image"], **kwargs | ||
): | ||
""" | ||
Open, load into memory, and close a DataArray from a file or file-like object | ||
containing a single data variable. | ||
|
@@ -32,6 +36,10 @@ def load_dataarray(filename_or_obj, **kwargs): | |
------- | ||
datarray : xarray.DataArray | ||
The newly created DataArray. | ||
engine : str | ||
Engine to use when reading files. Default engine is 'gmt'. | ||
decode_kind | ||
The kind of data to read. Valid values are ``"grid"`` or ``"image"``. | ||
|
||
See Also | ||
-------- | ||
|
@@ -41,7 +49,9 @@ def load_dataarray(filename_or_obj, **kwargs): | |
msg = "'cache' has no effect in this context." | ||
raise TypeError(msg) | ||
|
||
with xr.open_dataarray(filename_or_obj, **kwargs) as dataarray: | ||
with xr.open_dataarray( | ||
filename_or_obj, engine=engine, decode_kind=decode_kind, **kwargs | ||
) as dataarray: | ||
result = dataarray.load() | ||
_ = result.gmt # load GMTDataArray accessor information | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
""" | ||
Tests for xarray 'gmt' backend engine. | ||
""" | ||
|
||
import re | ||
|
||
import pytest | ||
import xarray as xr | ||
from pygmt.enums import GridRegistration, GridType | ||
from pygmt.exceptions import GMTInvalidInput | ||
|
||
|
||
def test_xarray_backend_gmt_read_grid(): | ||
""" | ||
Ensure that passing engine='gmt' to xarray.open_dataarray works for reading | ||
NetCDF grids. | ||
""" | ||
with xr.open_dataarray( | ||
weiji14 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
filename_or_obj="@static_earth_relief.nc", engine="gmt", decode_kind="grid" | ||
) as da: | ||
weiji14 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
assert da.sizes == {"lat": 14, "lon": 8} | ||
assert da.dtype == "float32" | ||
assert da.gmt.registration == GridRegistration.PIXEL | ||
assert da.gmt.gtype == GridType.GEOGRAPHIC | ||
|
||
|
||
def test_xarray_backend_gmt_read_image(): | ||
""" | ||
Ensure that passing engine='gmt' to xarray.open_dataarray works for reading | ||
GeoTIFF images. | ||
""" | ||
with xr.open_dataarray( | ||
filename_or_obj="@earth_day_01d", engine="gmt", decode_kind="image" | ||
) as da: | ||
weiji14 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
assert da.sizes == {"band": 3, "y": 180, "x": 360} | ||
assert da.dtype == "uint8" | ||
assert da.gmt.registration == GridRegistration.PIXEL | ||
assert da.gmt.gtype == GridType.GEOGRAPHIC | ||
|
||
|
||
def test_xarray_backend_gmt_read_invalid_kind(): | ||
""" | ||
Check that xarray.open_dataarray(..., engine="gmt") fails with missing or incorrect | ||
'decode_kind'. | ||
""" | ||
with pytest.raises( | ||
TypeError, | ||
match=re.escape( | ||
"GMTBackendEntrypoint.open_dataset() missing 1 required keyword-only argument: 'decode_kind'" | ||
), | ||
): | ||
xr.open_dataarray("nokind.nc", engine="gmt") | ||
|
||
with pytest.raises(GMTInvalidInput): | ||
xr.open_dataarray( | ||
filename_or_obj="invalid.tif", engine="gmt", decode_kind="invalid" | ||
) |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Considering moving this file to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,72 @@ | ||||||
""" | ||||||
An xarray backend for reading raster grid/image files using the 'gmt' engine. | ||||||
""" | ||||||
|
||||||
import os | ||||||
from pathlib import Path | ||||||
from typing import Literal | ||||||
|
||||||
import xarray as xr | ||||||
from pygmt.clib import Session | ||||||
from pygmt.exceptions import GMTInvalidInput | ||||||
from pygmt.helpers import build_arg_list | ||||||
from pygmt.src.which import which | ||||||
from xarray.backends import BackendEntrypoint | ||||||
|
||||||
|
||||||
class GMTBackendEntrypoint(BackendEntrypoint): | ||||||
""" | ||||||
Xarray backend to read raster grid/image files using 'gmt' engine. | ||||||
|
||||||
Relies on the libgdal-netcdf driver used by GMT C for NetCDF files, and libgdal for | ||||||
GeoTIFF files. | ||||||
seisman marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
""" | ||||||
|
||||||
description = "Open raster (.grd, .nc or .tif) files in Xarray via GMT read." | ||||||
weiji14 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
open_dataset_parameters = ("filename_or_obj", "kind") | ||||||
weiji14 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
url = "https://github.com/GenericMappingTools/pygmt" | ||||||
|
||||||
def open_dataset( # type: ignore[override] | ||||||
self, | ||||||
filename_or_obj: str | os.PathLike, | ||||||
weiji14 marked this conversation as resolved.
Show resolved
Hide resolved
weiji14 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
*, | ||||||
drop_variables=None, # noqa: ARG002 | ||||||
decode_kind: Literal["grid", "image"], | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why choose the name There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, it does sound like |
||||||
# other backend specific keyword arguments | ||||||
# `chunks` and `cache` DO NOT go here, they are handled by xarray | ||||||
) -> xr.Dataset: | ||||||
""" | ||||||
Backend open_dataset method used by Xarray in :py:func:`~xarray.open_dataset`. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With the fix in #3927, sphinx-autogen will generate a stub file for this method. The method documentation is not ideal, at least one "Parameters" section is needed. |
||||||
""" | ||||||
if decode_kind not in {"grid", "image"}: | ||||||
msg = f"Invalid raster kind: '{decode_kind}'. Valid values are 'grid' or 'image'." | ||||||
raise GMTInvalidInput(msg) | ||||||
|
||||||
with Session() as lib: | ||||||
with lib.virtualfile_out(kind=decode_kind) as voutfile: | ||||||
kwdict = {"T": {"grid": "g", "image": "i"}[decode_kind]} | ||||||
lib.call_module( | ||||||
module="read", | ||||||
args=[filename_or_obj, voutfile, *build_arg_list(kwdict)], | ||||||
) | ||||||
|
||||||
raster: xr.DataArray = lib.virtualfile_to_raster( | ||||||
vfname=voutfile, kind=decode_kind | ||||||
) | ||||||
# Add "source" encoding | ||||||
source = which(fname=filename_or_obj) | ||||||
raster.encoding["source"] = ( | ||||||
source[0] if isinstance(source, list) else source | ||||||
) | ||||||
_ = raster.gmt # Load GMTDataArray accessor information | ||||||
return raster.to_dataset() | ||||||
|
||||||
def guess_can_open(self, filename_or_obj) -> bool: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we should add tests to ensure the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I was expecting that the following code should work:
but I got the following error:
It's likely that the built-in NetCDF4 backend has a higher priority than other 3rd-party engines, so the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes, according to the docs at https://github.com/pydata/xarray/blob/ee862fe12163b728a638216195217aa935238dc3/xarray/backends/api.py#L750-L752 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess it means we can remove the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about if we remove There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The netCDF engine reads the file's magic number to determine its format. This allows it to recognize netCDF files regardless of their file extension. The "rasterio" engine relies on file extensions and supports extensions like As I understand it, the actual engine to use is determined by the order of the available engines. I have the following engines locally: >>> xr.backends.list_engines()
{'netcdf4': <NetCDF4BackendEntrypoint>
Open netCDF (.nc, .nc4 and .cdf) and most HDF5 files using netCDF4 in Xarray
Learn more at https://docs.xarray.dev/en/stable/generated/xarray.backends.NetCDF4BackendEntrypoint.html,
'scipy': <ScipyBackendEntrypoint>
Open netCDF files (.nc, .nc4, .cdf and .gz) using scipy in Xarray
Learn more at https://docs.xarray.dev/en/stable/generated/xarray.backends.ScipyBackendEntrypoint.html,
'gmt': <GMTBackendEntrypoint>
Open raster (.grd, .nc or .tif) files in Xarray via GMT.
Learn more at https://github.com/GenericMappingTools/pygmt,
'rasterio': <RasterioBackend>,
'store': <StoreBackendEntrypoint>
Open AbstractDataStore instances in Xarray
Learn more at https://docs.xarray.dev/en/stable/generated/xarray.backends.StoreBackendEntrypoint.html} So, when reading netCDF files, the default engine is always
For the list of engines, standard engines like "netcdf4", "h5netcdf", and "scipy" are prioritized first. Any additional engines (like "gmt" and "rasterio") are then sorted alphabetically. This explains why "gmt" comes before "rasterio" and ends up being selected as the default for .tif files. That raises the question: can we really trust the engine order? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, the engine order doesn't look very deterministic, with ones like engine = GMTBackendEntrypoint(decode_kind="grid")
ds = xr.open_dataset("some_file.nc", engine=engine) But that PR doesn't look like it will be merged anytime soon, so we still need to pass There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes, I feel it's always good to explicitly specify the engine. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The (I had an idea that we can try detecting if the file starts with |
||||||
""" | ||||||
Backend open_dataset method used by Xarray in :py:func:`~xarray.open_dataset`. | ||||||
""" | ||||||
try: | ||||||
ext = Path(filename_or_obj).suffix | ||||||
except TypeError: | ||||||
return False | ||||||
return ext in {".grd", ".nc", ".tif"} | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, added |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm wondering if it makes more sense to have a separate category like "Xarray Integration" and put both
GMTBackendEntrypoint
andGMTDataArrayAccessor
in the category. Also, if #3854 is implemented,GMTDataArrayAccessor
no longer makes sense in the "Metadata" category.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I was wondering about this too. I can put
GMTBackendEntrypoint
in a new "Xarray Integration" section as you suggested, and then we'll remove the I/O section in a separate PR.Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Move
GMTDataArrayAccessor
to the "Xarray Integration" section in this PR?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think do that in #3854. Hopefully this PR can be done before that.