add SOLRAD data parser to iotools (#667)

wholmgren · cwhanse · commit af355bd36b2c · 2019-02-28T18:44:34.000-07:00
* not working solrad

* working solrad parser

* add tests, dtypes, madison

* add api doc

* manually set nan values

* use pardir. add data_dir to conftest

* unused input. silence stickler E226 (whitespace)
diff --git a/.stickler.yml b/.stickler.yml
@@ -2,7 +2,7 @@ linters:
     flake8:
         python: 3
         max-line-length: 79
-        ignore: E201,E241
+        ignore: E201,E241,E226
 files:
     ignore:
         - 'pvlib/_version.py'
diff --git a/docs/sphinx/source/api.rst b/docs/sphinx/source/api.rst
@@ -338,6 +338,7 @@ relevant to solar energy modeling.
    iotools.read_ecmwf_macc
    iotools.get_ecmwf_macc
    iotools.read_crn
+   iotools.read_solrad
 
 A :py:class:`~pvlib.location.Location` object may be created from metadata
 in some files.
diff --git a/docs/sphinx/source/whatsnew/v0.6.2.rst b/docs/sphinx/source/whatsnew/v0.6.2.rst
@@ -19,6 +19,7 @@ API Changes
 Enhancements
 ~~~~~~~~~~~~
 * Add US CRN data reader to `pvlib.iotools`.
+* Add SOLRAD data reader to `pvlib.iotools`.
 
 Bug fixes
 ~~~~~~~~~
diff --git a/pvlib/data/abq19056.dat b/pvlib/data/abq19056.dat
@@ -0,0 +1,6 @@
+ Albuquerque
+   35.03796 -106.62211 1617 -7  version 1
+ 2019  56  2 25  0  0  0.000  79.30   104.5 0    60.5 0    97.8 0     5.9 0    43.6 0     0.382     2.280     0.431     0.066
+ 2019  56  2 25  0  1  0.017  79.49   102.6 0    59.7 0    96.2 0     5.7 0    43.6 0     0.764     1.800     0.431     0.063
+ 2019  56  2 25  0  2  0.033  79.68   102.1 0    65.8 0    94.8 0     5.5 0    43.6 0     0.382     4.079     0.323     0.062
+ 2019  56  2 25  0  3  0.050  79.87   102.6 0    76.3 0 -9999.9 0     5.3 0    43.6 0     0.509     1.920     0.215     0.059
diff --git a/pvlib/data/msn19056.dat b/pvlib/data/msn19056.dat
@@ -0,0 +1,6 @@
+ Madison
+   43.07250 -89.41133 271 -6 version 1
+ 2019  56  2 25  0  0  0.000  94.28    -2.3 0     0.0 0     0.4 0 -9999.9 1 -9999.9 1   187.2 0   265.6 0   265.3 0     0.000     0.000     0.000 -9999.900     0.002    26.000    27.000
+ 2019  56  2 25  0  1  0.017  94.46    -2.3 0     0.0 0     0.1 0 -9999.9 1 -9999.9 1   188.2 0   265.6 0   265.3 0     0.133     0.128     0.223 -9999.900     0.001    26.000    72.000
+ 2019  56  2 25  0  2  0.033  94.64    -2.7 0    -0.2 0     0.0 0 -9999.9 1 -9999.9 1   187.6 0   265.6 0   265.3 0     0.000     0.257     0.000 -9999.900     0.001    24.000    42.000
+ 2019  56  2 25  0  3  0.050  94.82    -2.5 0     0.4 0     0.0 0 -9999.9 1 -9999.9 1   187.3 0   265.6 0   265.3 0     0.266     0.385     0.000 -9999.900     0.001    26.000    48.000
diff --git a/pvlib/iotools/__init__.py b/pvlib/iotools/__init__.py
@@ -8,3 +8,4 @@
 from pvlib.iotools.ecmwf_macc import read_ecmwf_macc  # noqa: F401
 from pvlib.iotools.ecmwf_macc import get_ecmwf_macc  # noqa: F401
 from pvlib.iotools.crn import read_crn  # noqa: F401
+from pvlib.iotools.solrad import read_solrad  # noqa: F401
diff --git a/pvlib/iotools/solrad.py b/pvlib/iotools/solrad.py
@@ -0,0 +1,122 @@
+"""Functions to read data from the NOAA SOLRAD network.
+"""
+
+import numpy as np
+import pandas as pd
+
+# pvlib conventions
+BASE_HEADERS = (
+    'year', 'julian_day', 'month', 'day', 'hour', 'minute', 'decimal_time',
+    'solar_zenith', 'ghi', 'ghi_flag', 'dni', 'dni_flag', 'dhi', 'dhi_flag',
+    'uvb', 'uvb_flag', 'uvb_temp', 'uvb_temp_flag'
+)
+
+# following README_SOLRAD.txt variable names for remaining
+STD_HEADERS = ('std_dw_psp', 'std_direct', 'std_diffuse', 'std_uvb')
+
+HEADERS = BASE_HEADERS + STD_HEADERS
+
+DPIR_HEADERS = ('dpir', 'dpir_flag', 'dpirc', 'dpirc_flag', 'dpird',
+                'dpird_flag')
+
+MADISON_HEADERS = BASE_HEADERS + DPIR_HEADERS + STD_HEADERS + (
+    'std_dpir', 'std_dpirc', 'std_dpird')
+
+
+# as specified in README_SOLRAD.txt file. excludes 1 space between columns
+WIDTHS = [4, 3] + 4*[2] + [6, 6] + 5*[7, 1] + 4*[9]
+MADISON_WIDTHS = [4, 3] + 4*[2] + [6, 6] + 8*[7, 1] + 7*[9]
+# add 1 to make fields contiguous (required by pandas.read_fwf)
+WIDTHS = [w + 1 for w in WIDTHS]
+MADISON_WIDTHS = [w + 1 for w in MADISON_WIDTHS]
+# no space after last column
+WIDTHS[-1] -= 1
+MADISON_WIDTHS[-1] -= 1
+
+DTYPES = [
+    'int64', 'int64', 'int64', 'int64', 'int64', 'int64', 'float64',
+    'float64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64',
+    'float64', 'int64', 'float64', 'int64', 'float64', 'float64',
+    'float64', 'float64']
+
+MADISON_DTYPES = [
+    'int64', 'int64', 'int64', 'int64', 'int64', 'int64', 'float64', 'float64',
+    'float64', 'int64', 'float64', 'int64', 'float64', 'int64', 'float64',
+    'int64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64',
+    'float64', 'int64', 'float64', 'float64', 'float64', 'float64', 'float64',
+    'float64', 'float64']
+
+
+def read_solrad(filename):
+    """
+    Read NOAA SOLRAD [1]_ [2]_ fixed-width file into pandas dataframe.
+
+    Parameters
+    ----------
+    filename: str
+        filepath or url to read for the fixed-width file.
+
+    Returns
+    -------
+    data: Dataframe
+        A dataframe with DatetimeIndex and all of the variables in the
+        file.
+
+    Notes
+    -----
+    SOLRAD data resolution is described by the README_SOLRAD.txt:
+    "Before 1-jan. 2015 the data were reported as 3-min averages;
+    on and after 1-Jan. 2015, SOLRAD data are reported as 1-min.
+    averages of 1-sec. samples."
+    Here, missing data is flagged as NaN, rather than -9999.9.
+
+    References
+    ----------
+    .. [1] NOAA SOLRAD Network
+       `https://www.esrl.noaa.gov/gmd/grad/solrad/index.html
+       <https://www.esrl.noaa.gov/gmd/grad/solrad/index.html>`_
+
+    .. [2] B. B. Hicks et. al., (1996), The NOAA Integrated Surface
+       Irradiance Study (ISIS). A New Surface Radiation Monitoring
+       Program. Bull. Amer. Meteor. Soc., 77, 2857-2864.
+       :doi:`10.1175/1520-0477(1996)077<2857:TNISIS>2.0.CO;2`
+    """
+    if 'msn' in filename:
+        names = MADISON_HEADERS
+        widths = MADISON_WIDTHS
+        dtypes = MADISON_DTYPES
+    else:
+        names = HEADERS
+        widths = WIDTHS
+        dtypes = DTYPES
+
+    # read in data
+    data = pd.read_fwf(filename, header=None, skiprows=2, names=names,
+                       widths=widths, na_values=-9999.9)
+
+    # loop here because dtype kwarg not supported in read_fwf until 0.20
+    for (col, _dtype) in zip(data.columns, dtypes):
+        ser = data[col].astype(_dtype)
+        if _dtype == 'float64':
+            # older verions of pandas/numpy read '-9999.9' as
+            # -9999.8999999999996 and fail to set nan in read_fwf,
+            # so manually set nan
+            ser = ser.where(ser > -9999, other=np.nan)
+        data[col] = ser
+
+    # set index
+    # columns do not have leading 0s, so must zfill(2) to comply
+    # with %m%d%H%M format
+    dts = data[['month', 'day', 'hour', 'minute']].astype(str).apply(
+        lambda x: x.str.zfill(2))
+    dtindex = pd.to_datetime(
+        data['year'].astype(str) + dts['month'] + dts['day'] + dts['hour'] +
+        dts['minute'], format='%Y%m%d%H%M', utc=True)
+    data = data.set_index(dtindex)
+    try:
+        # to_datetime(utc=True) does not work in older versions of pandas
+        data = data.tz_localize('UTC')
+    except TypeError:
+        pass
+
+    return data
diff --git a/pvlib/test/conftest.py b/pvlib/test/conftest.py
@@ -1,3 +1,5 @@
+import inspect
+import os
 import platform
 
 import numpy as np
@@ -33,6 +35,12 @@ def inner():
     return wrapper
 
 
+# commonly used directories in the tests
+test_dir = os.path.dirname(
+    os.path.abspath(inspect.getfile(inspect.currentframe())))
+data_dir = os.path.join(test_dir, os.pardir, 'data')
+
+
 has_python2 = parse_version(platform.python_version()) < parse_version('3')
 
 platform_is_windows = platform.system() == 'Windows'
diff --git a/pvlib/test/test_solrad.py b/pvlib/test/test_solrad.py
@@ -0,0 +1,104 @@
+import os
+
+import pandas as pd
+from pandas.util.testing import assert_frame_equal
+import numpy as np
+from numpy import nan
+
+import pytest
+
+from pvlib.iotools import solrad
+from conftest import data_dir
+
+
+testfile = os.path.join(data_dir, 'abq19056.dat')
+testfile_mad = os.path.join(data_dir, 'msn19056.dat')
+
+
+columns = [
+    'year', 'julian_day', 'month', 'day', 'hour', 'minute', 'decimal_time',
+    'solar_zenith', 'ghi', 'ghi_flag', 'dni', 'dni_flag', 'dhi', 'dhi_flag',
+    'uvb', 'uvb_flag', 'uvb_temp', 'uvb_temp_flag', 'std_dw_psp', 'std_direct',
+    'std_diffuse', 'std_uvb']
+index = pd.DatetimeIndex(['2019-02-25 00:00:00',
+                          '2019-02-25 00:01:00',
+                          '2019-02-25 00:02:00',
+                          '2019-02-25 00:03:00'],
+                         freq=None).tz_localize('UTC')
+values = np.array([
+    [2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00, 0.000e+00,
+        0.000e+00, 7.930e+01, 1.045e+02, 0.000e+00, 6.050e+01, 0.000e+00,
+        9.780e+01, 0.000e+00, 5.900e+00, 0.000e+00, 4.360e+01, 0.000e+00,
+        3.820e-01, 2.280e+00, 4.310e-01, 6.000e-02],
+    [2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00, 1.000e+00,
+        1.700e-02, 7.949e+01, 1.026e+02, 0.000e+00, 5.970e+01, 0.000e+00,
+        9.620e+01, 0.000e+00, 5.700e+00, 0.000e+00, 4.360e+01, 0.000e+00,
+        7.640e-01, 1.800e+00, 4.310e-01, 6.000e-02],
+    [2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00, 2.000e+00,
+        3.300e-02, 7.968e+01, 1.021e+02, 0.000e+00, 6.580e+01, 0.000e+00,
+        9.480e+01, 0.000e+00, 5.500e+00, 0.000e+00, 4.360e+01, 0.000e+00,
+        3.820e-01, 4.079e+00, 3.230e-01, 6.000e-02],
+    [2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00, 3.000e+00,
+        5.000e-02, 7.987e+01, 1.026e+02, 0.000e+00, 7.630e+01, 0.000e+00,
+        nan, 0.000e+00, 5.300e+00, 0.000e+00, 4.360e+01, 0.000e+00,
+        5.090e-01, 1.920e+00, 2.150e-01, 5.000e-02]])
+dtypes = [
+    'int64', 'int64', 'int64', 'int64', 'int64', 'int64', 'float64',
+    'float64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64',
+    'float64', 'int64', 'float64', 'int64', 'float64', 'float64',
+    'float64', 'float64']
+
+columns_mad = [
+    'year', 'julian_day', 'month', 'day', 'hour', 'minute', 'decimal_time',
+    'solar_zenith', 'ghi', 'ghi_flag', 'dni', 'dni_flag', 'dhi', 'dhi_flag',
+    'uvb', 'uvb_flag', 'uvb_temp', 'uvb_temp_flag', 'dpir', 'dpir_flag',
+    'dpirc', 'dpirc_flag', 'dpird', 'dpird_flag', 'std_dw_psp',
+    'std_direct', 'std_diffuse', 'std_uvb', 'std_dpir', 'std_dpirc',
+    'std_dpird']
+values_mad = np.array([
+    [ 2.019e+03,  5.600e+01,  2.000e+00,  2.500e+01,  0.000e+00,
+      0.000e+00,  0.000e+00,  9.428e+01, -2.300e+00,  0.000e+00,
+      0.000e+00,  0.000e+00,  4.000e-01,  0.000e+00,        nan,
+      1.000e+00,        nan,  1.000e+00,  1.872e+02,  0.000e+00,
+      2.656e+02,  0.000e+00,  2.653e+02,  0.000e+00,  0.000e+00,
+      0.000e+00,  0.000e+00,        nan,  2.000e-03,  2.600e+01,
+      2.700e+01],
+    [ 2.019e+03,  5.600e+01,  2.000e+00,  2.500e+01,  0.000e+00,
+      1.000e+00,  1.700e-02,  9.446e+01, -2.300e+00,  0.000e+00,
+      0.000e+00,  0.000e+00,  1.000e-01,  0.000e+00,        nan,
+      1.000e+00,        nan,  1.000e+00,  1.882e+02,  0.000e+00,
+      2.656e+02,  0.000e+00,  2.653e+02,  0.000e+00,  1.330e-01,
+      1.280e-01,  2.230e-01,        nan,  1.000e-03,  2.600e+01,
+      7.200e+01],
+    [ 2.019e+03,  5.600e+01,  2.000e+00,  2.500e+01,  0.000e+00,
+      2.000e+00,  3.300e-02,  9.464e+01, -2.700e+00,  0.000e+00,
+     -2.000e-01,  0.000e+00,  0.000e+00,  0.000e+00,        nan,
+      1.000e+00,        nan,  1.000e+00,  1.876e+02,  0.000e+00,
+      2.656e+02,  0.000e+00,  2.653e+02,  0.000e+00,  0.000e+00,
+      2.570e-01,  0.000e+00,        nan,  1.000e-03,  2.400e+01,
+      4.200e+01],
+    [ 2.019e+03,  5.600e+01,  2.000e+00,  2.500e+01,  0.000e+00,
+      3.000e+00,  5.000e-02,  9.482e+01, -2.500e+00,  0.000e+00,
+      4.000e-01,  0.000e+00,  0.000e+00,  0.000e+00,        nan,
+      1.000e+00,        nan,  1.000e+00,  1.873e+02,  0.000e+00,
+      2.656e+02,  0.000e+00,  2.653e+02,  0.000e+00,  2.660e-01,
+      3.850e-01,  0.000e+00,        nan,  1.000e-03,  2.600e+01,
+      4.800e+01]])
+dtypes_mad = [
+    'int64', 'int64', 'int64', 'int64', 'int64', 'int64', 'float64', 'float64',
+    'float64', 'int64', 'float64', 'int64', 'float64', 'int64', 'float64',
+    'int64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64',
+    'float64', 'int64', 'float64', 'float64', 'float64', 'float64', 'float64',
+    'float64', 'float64']
+
+
+@pytest.mark.parametrize('testfile,index,columns,values,dtypes', [
+    (testfile, index, columns, values, dtypes),
+    (testfile_mad, index, columns_mad, values_mad, dtypes_mad)
+])
+def test_read_solrad(testfile, index, columns, values, dtypes):
+    expected = pd.DataFrame(values, columns=columns, index=index)
+    for (col, _dtype) in zip(expected.columns, dtypes):
+        expected[col] = expected[col].astype(_dtype)
+    out = solrad.read_solrad(testfile)
+    assert_frame_equal(out, expected)