Skip to content

Standardize pvgis_hourly to return (data,meta) #2462

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion docs/sphinx/source/whatsnew/v0.12.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@ v0.12.1 (XXXX, 2025)

Breaking Changes
~~~~~~~~~~~~~~~~

* The functions :py:func:`~pvlib.iotools.read_pvgis_hourly` and
:py:func:`~pvlib.iotools.get_pvgis_hourly` now return ``(data,meta)``
following the iotools convention instead of ``(data,inputs,meta)``.
The ``inputs`` dictionary is now included in ``meta``, which
has changed structure to accommodate it. (:pull:`2462`)

Deprecations
~~~~~~~~~~~~
Expand Down
48 changes: 31 additions & 17 deletions pvlib/iotools/pvgis.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ def get_pvgis_hourly(latitude, longitude, start=None, end=None,

PVGIS data is freely available at [1]_.

.. versionchanged:: 0.13.0
The function now returns two items ``(data,meta)``. Previous
versions of this function returned three elements
``(data,inputs,meta)``. The ``inputs`` dictionary is now included in
``meta``, which has changed structure to accommodate it.

Parameters
----------
latitude: float
Expand Down Expand Up @@ -130,8 +136,6 @@ def get_pvgis_hourly(latitude, longitude, start=None, end=None,
-------
data : pandas.DataFrame
Time-series of hourly data, see Notes for fields
inputs : dict
Dictionary of the request input parameters
metadata : dict
Dictionary containing metadata

Expand Down Expand Up @@ -189,7 +193,7 @@ def get_pvgis_hourly(latitude, longitude, start=None, end=None,
Examples
--------
>>> # Retrieve two years of irradiance data from PVGIS:
>>> data, meta, inputs = pvlib.iotools.get_pvgis_hourly( # doctest: +SKIP
>>> data, meta = pvlib.iotools.get_pvgis_hourly( # doctest: +SKIP
>>> latitude=45, longitude=8, start=2015, end=2016) # doctest: +SKIP

References
Expand Down Expand Up @@ -241,28 +245,33 @@ def get_pvgis_hourly(latitude, longitude, start=None, end=None,


def _parse_pvgis_hourly_json(src, map_variables):
inputs = src['inputs']
metadata = src['meta']
metadata = src['meta'].copy()
# Override the "inputs" in metadata
metadata['inputs'] = src['inputs']
# Re-add the inputs in metadata one-layer down
metadata['inputs']['descriptions'] = src['meta']['inputs']
data = pd.DataFrame(src['outputs']['hourly'])
data.index = pd.to_datetime(data['time'], format='%Y%m%d:%H%M', utc=True)
data = data.drop('time', axis=1)
data = data.astype(dtype={'Int': 'int'}) # The 'Int' column to be integer
if map_variables:
data = data.rename(columns=VARIABLE_MAP)
return data, inputs, metadata
return data, metadata


def _parse_pvgis_hourly_csv(src, map_variables):
# The first 4 rows are latitude, longitude, elevation, radiation database
inputs = {}
metadata = {'inputs': {}}
# 'location' metadata
# 'Latitude (decimal degrees): 45.000\r\n'
inputs['latitude'] = float(src.readline().split(':')[1])
metadata['inputs']['latitude'] = float(src.readline().split(':')[1])
# 'Longitude (decimal degrees): 8.000\r\n'
inputs['longitude'] = float(src.readline().split(':')[1])
metadata['inputs']['longitude'] = float(src.readline().split(':')[1])
# Elevation (m): 1389.0\r\n
inputs['elevation'] = float(src.readline().split(':')[1])
metadata['inputs']['elevation'] = float(src.readline().split(':')[1])
# 'Radiation database: \tPVGIS-SARAH\r\n'
inputs['radiation_database'] = src.readline().split(':')[1].strip()
metadata['inputs']['radiation_database'] = \
src.readline().split(':')[1].strip()
# Parse through the remaining metadata section (the number of lines for
# this section depends on the requested parameters)
while True:
Expand All @@ -273,7 +282,7 @@ def _parse_pvgis_hourly_csv(src, map_variables):
break
# Only retrieve metadata from non-empty lines
elif line.strip() != '':
inputs[line.split(':')[0]] = line.split(':')[1].strip()
metadata['inputs'][line.split(':')[0]] = line.split(':')[1].strip()
elif line == '': # If end of file is reached
raise ValueError('No data section was detected. File has probably '
'been modified since being downloaded from PVGIS')
Expand All @@ -295,16 +304,23 @@ def _parse_pvgis_hourly_csv(src, map_variables):
# integer. It is necessary to convert to float, before converting to int
data = data.astype(float).astype(dtype={'Int': 'int'})
# Generate metadata dictionary containing description of parameters
metadata = {}
metadata['descriptions'] = {}
for line in src.readlines():
if ':' in line:
metadata[line.split(':')[0]] = line.split(':')[1].strip()
return data, inputs, metadata
metadata['descriptions'][line.split(':')[0]] = \
line.split(':')[1].strip()
return data, metadata


def read_pvgis_hourly(filename, pvgis_format=None, map_variables=True):
"""Read a PVGIS hourly file.

.. versionchanged:: 0.13.0
The function now returns two items ``(data,meta)``. Previous
versions of this function returned three elements
``(data,inputs,meta)``. The ``inputs`` dictionary is now included in
``meta``, which has changed structure to accommodate it.

Parameters
----------
filename : str, pathlib.Path, or file-like buffer
Expand All @@ -323,8 +339,6 @@ def read_pvgis_hourly(filename, pvgis_format=None, map_variables=True):
-------
data : pandas.DataFrame
the time series data
inputs : dict
the inputs
metadata : dict
metadata

Expand Down
27 changes: 18 additions & 9 deletions tests/iotools/test_pvgis.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,16 @@
'WS10m': {'description': '10-m total wind speed', 'units': 'm/s'}, # noqa: E501
'Int': {'description': '1 means solar radiation values are reconstructed'}}}}} # noqa: E501

# Reformat the metadata as implemented in #2462
descriptions_csv = metadata_radiation_csv.copy()
metadata_radiation_csv = {}
metadata_radiation_csv['descriptions'] = descriptions_csv
metadata_radiation_csv['inputs'] = inputs_radiation_csv

descriptions_json = metadata_pv_json['inputs']
metadata_pv_json['inputs'] = inputs_pv_json
metadata_pv_json['inputs']['descriptions'] = descriptions_json


def generate_expected_dataframe(values, columns, index):
"""Create dataframe from arrays of values, columns and index, in order to
Expand Down Expand Up @@ -175,25 +185,24 @@ def expected_pv_json_mapped():
# Test read_pvgis_hourly function using two different files with different
# input arguments (to test variable mapping and pvgis_format)
# pytest request.getfixturevalue is used to simplify the input arguments
@pytest.mark.parametrize('testfile,expected_name,metadata_exp,inputs_exp,map_variables,pvgis_format', [ # noqa: E501
@pytest.mark.parametrize('testfile,expected_name,metadata_exp,map_variables,pvgis_format', [ # noqa: E501
(testfile_radiation_csv, 'expected_radiation_csv', metadata_radiation_csv,
inputs_radiation_csv, False, None),
False, None),
(testfile_radiation_csv, 'expected_radiation_csv_mapped',
metadata_radiation_csv, inputs_radiation_csv, True, 'csv'),
(testfile_pv_json, 'expected_pv_json', metadata_pv_json, inputs_pv_json,
metadata_radiation_csv, True, 'csv'),
(testfile_pv_json, 'expected_pv_json', metadata_pv_json,
False, None),
(testfile_pv_json, 'expected_pv_json_mapped', metadata_pv_json,
inputs_pv_json, True, 'json')])
True, 'json')])
def test_read_pvgis_hourly(testfile, expected_name, metadata_exp,
inputs_exp, map_variables, pvgis_format, request):
map_variables, pvgis_format, request):
# Get expected dataframe from fixture
expected = request.getfixturevalue(expected_name)
# Read data from file
out, inputs, metadata = read_pvgis_hourly(
out, metadata = read_pvgis_hourly(
testfile, map_variables=map_variables, pvgis_format=pvgis_format)
# Assert whether dataframe, metadata, and inputs are as expected
assert_frame_equal(out, expected)
assert inputs == inputs_exp
assert metadata == metadata_exp


Expand Down Expand Up @@ -248,7 +257,7 @@ def test_get_pvgis_hourly(requests_mock, testfile, expected_name, args,
# inputs are passing on correctly
requests_mock.get(url_test, text=mock_response)
# Make API call - an error is raised if requested URI does not match
out, inputs, metadata = get_pvgis_hourly(
out, metadata = get_pvgis_hourly(
latitude=45, longitude=8, map_variables=map_variables, **args)
# Get expected dataframe from fixture
expected = request.getfixturevalue(expected_name)
Expand Down
Loading