diff --git a/docs/sphinx/source/reference/iotools.rst b/docs/sphinx/source/reference/iotools.rst index 481f46ddb5..d6dd31ac4d 100644 --- a/docs/sphinx/source/reference/iotools.rst +++ b/docs/sphinx/source/reference/iotools.rst @@ -17,6 +17,7 @@ of sources and file formats relevant to solar energy modeling. iotools.parse_epw iotools.read_srml iotools.read_srml_month_from_solardat + iotools.get_srml iotools.read_surfrad iotools.read_midc iotools.read_midc_raw_data_from_nrel diff --git a/docs/sphinx/source/whatsnew/v0.10.0.rst b/docs/sphinx/source/whatsnew/v0.10.0.rst index ab58828653..86a6315d25 100644 --- a/docs/sphinx/source/whatsnew/v0.10.0.rst +++ b/docs/sphinx/source/whatsnew/v0.10.0.rst @@ -36,6 +36,10 @@ Enhancements * Added `map_variables` parameter to :py:func:`pvlib.iotools.read_srml` and :py:func:`pvlib.iotools.read_srml_month_from_solardat` (:pull:`1773`) +* Added :func:`pvlib.iotools.get_srml` that is similar to + :func:`pvlib.iotools.read_srml_month_from_solardat` but is able to fetch multiple months + of data using the `start` and `end` parameters. + (:pull:`1779`) * Allow passing keyword arguments to :py:func:`scipy:scipy.optimize.brentq` and :py:func:`scipy:scipy.optimize.newton` solvers in :py:func:`~pvlib.singlediode.bishop88_mpp`, diff --git a/docs/sphinx/source/whatsnew/v0.9.6.rst b/docs/sphinx/source/whatsnew/v0.9.6.rst index dc8e8d0701..943d31715c 100644 --- a/docs/sphinx/source/whatsnew/v0.9.6.rst +++ b/docs/sphinx/source/whatsnew/v0.9.6.rst @@ -44,11 +44,13 @@ Deprecations (data period 2003-2012). Instead, ECMWF recommends to use CAMS global reanalysis (EAC4) from the Atmosphere Data Store (ADS). See also :py:func:`pvlib.iotools.get_cams`. (:issue:`1691`, :pull:`1654`) - * The ``recolumn`` parameter in :py:func:`pvlib.iotools.read_tmy3`, which maps TMY3 column names to nonstandard alternatives, is now deprecated. We encourage using ``map_variables`` (which produces standard pvlib names) instead. (:issue:`1517`, :pull:`1623`) +* :py:func:`pvlib.iotools.read_srml_month_from_solardat` is deprecated and replaced by + :py:func:`pvlib.iotools.get_srml`. (:pull:`1779`) + Enhancements ~~~~~~~~~~~~ diff --git a/pvlib/iotools/__init__.py b/pvlib/iotools/__init__.py index 0a94e79f53..338f797673 100644 --- a/pvlib/iotools/__init__.py +++ b/pvlib/iotools/__init__.py @@ -2,6 +2,7 @@ from pvlib.iotools.epw import read_epw, parse_epw # noqa: F401 from pvlib.iotools.srml import read_srml # noqa: F401 from pvlib.iotools.srml import read_srml_month_from_solardat # noqa: F401 +from pvlib.iotools.srml import get_srml # noqa: F401 from pvlib.iotools.surfrad import read_surfrad # noqa: F401 from pvlib.iotools.midc import read_midc # noqa: F401 from pvlib.iotools.midc import read_midc_raw_data_from_nrel # noqa: F401 diff --git a/pvlib/iotools/srml.py b/pvlib/iotools/srml.py index 86b3e3aeb6..81b61556a0 100644 --- a/pvlib/iotools/srml.py +++ b/pvlib/iotools/srml.py @@ -3,7 +3,10 @@ """ import numpy as np import pandas as pd +import urllib +import warnings +from pvlib._deprecation import deprecated # VARIABLE_MAP is a dictionary mapping SRML data element numbers to their # pvlib names. For most variables, only the first three digits are used, @@ -26,8 +29,9 @@ def read_srml(filename, map_variables=True): """ - Read University of Oregon SRML 1min .tsv file into pandas dataframe. The - SRML is described in [1]_. + Read University of Oregon SRML 1min .tsv file into pandas dataframe. + + The SRML is described in [1]_. Parameters ---------- @@ -51,14 +55,14 @@ def read_srml(filename, map_variables=True): the time of the row until the time of the next row. This is consistent with pandas' default labeling behavior. - See SRML's `Archival Files`_ page for more information. - - .. _Archival Files: http://solardat.uoregon.edu/ArchivalFiles.html + See [2]_ for more information concerning the file format. References ---------- .. [1] University of Oregon Solar Radiation Monitoring Laboratory `http://solardat.uoregon.edu/ `_ + .. [2] `Archival (short interval) data files + `_ """ tsv_data = pd.read_csv(filename, delimiter='\t') data = _format_index(tsv_data) @@ -168,10 +172,12 @@ def _format_index(df): return df +@deprecated('0.10.0', alternative='pvlib.iotools.get_srml', removal='0.11.0') def read_srml_month_from_solardat(station, year, month, filetype='PO', map_variables=True): - """Request a month of SRML data from solardat and read it into - a Dataframe. The SRML is described in [1]_. + """Request a month of SRML data and read it into a Dataframe. + + The SRML is described in [1]_. Parameters ---------- @@ -222,3 +228,94 @@ def read_srml_month_from_solardat(station, year, month, filetype='PO', url = "http://solardat.uoregon.edu/download/Archive/" data = read_srml(url + file_name, map_variables=map_variables) return data + + +def get_srml(station, start, end, filetype='PO', map_variables=True, + url="http://solardat.uoregon.edu/download/Archive/"): + """Request data from UoO SRML and read it into a Dataframe. + + The University of Oregon Solar Radiation Monitoring Laboratory (SRML) is + described in [1]_. A list of stations can be found in [2]_. + + Data is returned for the entire months between and including start and end. + + Parameters + ---------- + station : str + Two letter station abbreviation. + start : datetime like + First day of the requested period + end : datetime like + Last day of the requested period + filetype : string, default: 'PO' + SRML file type to gather. See notes for explanation. + map_variables : bool, default: True + When true, renames columns of the DataFrame to pvlib variable names + where applicable. See variable :const:`VARIABLE_MAP`. + url : str, default: 'http://solardat.uoregon.edu/download/Archive/' + API endpoint URL + + Returns + ------- + data : pd.DataFrame + Dataframe with data from SRML. + meta : dict + Metadata. + + Notes + ----- + File types designate the time interval of a file and if it contains + raw or processed data. For instance, `RO` designates raw, one minute + data and `PO` designates processed one minute data. The availability + of file types varies between sites. Below is a table of file types + and their time intervals. See [1] for site information. + + ============= ============ ================== + time interval raw filetype processed filetype + ============= ============ ================== + 1 minute RO PO + 5 minute RF PF + 15 minute RQ PQ + hourly RH PH + ============= ============ ================== + + Warning + ------- + SRML data has nighttime data prefilled with 0s through the end of the + current month (i.e., values are provided for data in the future). + + References + ---------- + .. [1] University of Oregon Solar Radiation Measurement Laboratory + `http://solardat.uoregon.edu/ `_ + .. [2] Station ID codes - Solar Radiation Measurement Laboratory + `http://solardat.uoregon.edu/StationIDCodes.html + `_ + """ + # Use pd.to_datetime so that strings (e.g. '2021-01-01') are accepted + start = pd.to_datetime(start) + end = pd.to_datetime(end) + + # Generate list of months + months = pd.date_range( + start, end.replace(day=1) + pd.DateOffset(months=1), freq='1M') + months_str = months.strftime('%y%m') + + # Generate list of filenames + filenames = [f"{station}{filetype}{m}.txt" for m in months_str] + + dfs = [] # Initialize list of monthly dataframes + for f in filenames: + try: + dfi = read_srml(url + f, map_variables=map_variables) + dfs.append(dfi) + except urllib.error.HTTPError: + warnings.warn(f"The following file was not found: {f}") + + data = pd.concat(dfs, axis='rows') + + meta = {'filetype': filetype, + 'station': station, + 'filenames': filenames} + + return data, meta diff --git a/pvlib/tests/iotools/test_srml.py b/pvlib/tests/iotools/test_srml.py index 8bd8c94349..8f960885d4 100644 --- a/pvlib/tests/iotools/test_srml.py +++ b/pvlib/tests/iotools/test_srml.py @@ -3,7 +3,9 @@ import pytest from pvlib.iotools import srml -from ..conftest import DATA_DIR, RERUNS, RERUNS_DELAY +from ..conftest import (DATA_DIR, RERUNS, RERUNS_DELAY, assert_index_equal, + assert_frame_equal, fail_on_pvlib_version) +from pvlib._deprecation import pvlibDeprecationWarning srml_testfile = DATA_DIR / 'SRML-day-EUPO1801.txt' @@ -74,19 +76,33 @@ def test__map_columns(column, expected): assert srml._map_columns(column) == expected +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_srml(): + url = 'http://solardat.uoregon.edu/download/Archive/EUPO1801.txt' + file_data = srml.read_srml(url) + requested, _ = srml.get_srml(station='EU', start='2018-01-01', + end='2018-01-31') + assert_frame_equal(file_data, requested) + + +@fail_on_pvlib_version('0.11') @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) def test_read_srml_month_from_solardat(): url = 'http://solardat.uoregon.edu/download/Archive/EUPO1801.txt' file_data = srml.read_srml(url) - requested = srml.read_srml_month_from_solardat('EU', 2018, 1) + with pytest.warns(pvlibDeprecationWarning, match='get_srml instead'): + requested = srml.read_srml_month_from_solardat('EU', 2018, 1) assert file_data.equals(requested) +@fail_on_pvlib_version('0.11') @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) def test_15_minute_dt_index(): - data = srml.read_srml_month_from_solardat('TW', 2019, 4, 'RQ') + with pytest.warns(pvlibDeprecationWarning, match='get_srml instead'): + data = srml.read_srml_month_from_solardat('TW', 2019, 4, 'RQ') start = pd.Timestamp('20190401 00:00') start = start.tz_localize('Etc/GMT+8') end = pd.Timestamp('20190430 23:45') @@ -96,10 +112,12 @@ def test_15_minute_dt_index(): assert (data.index[3::4].minute == 45).all() +@fail_on_pvlib_version('0.11') @pytest.mark.remote_data @pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) def test_hourly_dt_index(): - data = srml.read_srml_month_from_solardat('CD', 1986, 4, 'PH') + with pytest.warns(pvlibDeprecationWarning, match='get_srml instead'): + data = srml.read_srml_month_from_solardat('CD', 1986, 4, 'PH') start = pd.Timestamp('19860401 00:00') start = start.tz_localize('Etc/GMT+8') end = pd.Timestamp('19860430 23:00') @@ -107,3 +125,40 @@ def test_hourly_dt_index(): assert data.index[0] == start assert data.index[-1] == end assert (data.index.minute == 0).all() + + +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_srml_hourly(): + data, meta = data, meta = srml.get_srml(station='CD', start='1986-04-01', + end='1986-05-31', filetype='PH') + expected_index = pd.date_range(start='1986-04-01', end='1986-05-31 23:59', + freq='1h', tz='Etc/GMT+8') + assert_index_equal(data.index, expected_index) + + +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_srml_minute(): + data_read = srml.read_srml(srml_testfile) + data_get, meta = srml.get_srml(station='EU', start='2018-01-01', + end='2018-01-31', filetype='PO') + expected_index = pd.date_range(start='2018-01-01', end='2018-01-31 23:59', + freq='1min', tz='Etc/GMT+8') + assert_index_equal(data_get.index, expected_index) + assert all([c in data_get.columns for c in data_read.columns]) + # Check that all indices in example file are present in remote file + assert data_read.index.isin(data_get.index).all() + assert meta['station'] == 'EU' + assert meta['filetype'] == 'PO' + assert meta['filenames'] == ['EUPO1801.txt'] + + +@pytest.mark.remote_data +@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY) +def test_get_srml_nonexisting_month_warning(): + with pytest.warns(UserWarning, match='file was not found: EUPO0912.txt'): + # Request data for a period where not all files exist + # Eugene (EU) station started reporting 1-minute data in January 2010 + data, meta = data, meta = srml.get_srml( + station='EU', start='2009-12-01', end='2010-01-31', filetype='PO')