Skip to content

Commit af355bd

Browse files
wholmgrencwhanse
authored andcommitted
add SOLRAD data parser to iotools (#667)
* not working solrad * working solrad parser * add tests, dtypes, madison * add api doc * manually set nan values * use pardir. add data_dir to conftest * unused input. silence stickler E226 (whitespace)
1 parent 437be18 commit af355bd

File tree

9 files changed

+250
-1
lines changed

9 files changed

+250
-1
lines changed

.stickler.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ linters:
22
flake8:
33
python: 3
44
max-line-length: 79
5-
ignore: E201,E241
5+
ignore: E201,E241,E226
66
files:
77
ignore:
88
- 'pvlib/_version.py'

docs/sphinx/source/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ relevant to solar energy modeling.
338338
iotools.read_ecmwf_macc
339339
iotools.get_ecmwf_macc
340340
iotools.read_crn
341+
iotools.read_solrad
341342

342343
A :py:class:`~pvlib.location.Location` object may be created from metadata
343344
in some files.

docs/sphinx/source/whatsnew/v0.6.2.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ API Changes
1919
Enhancements
2020
~~~~~~~~~~~~
2121
* Add US CRN data reader to `pvlib.iotools`.
22+
* Add SOLRAD data reader to `pvlib.iotools`.
2223

2324
Bug fixes
2425
~~~~~~~~~

pvlib/data/abq19056.dat

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Albuquerque
2+
35.03796 -106.62211 1617 -7 version 1
3+
2019 56 2 25 0 0 0.000 79.30 104.5 0 60.5 0 97.8 0 5.9 0 43.6 0 0.382 2.280 0.431 0.066
4+
2019 56 2 25 0 1 0.017 79.49 102.6 0 59.7 0 96.2 0 5.7 0 43.6 0 0.764 1.800 0.431 0.063
5+
2019 56 2 25 0 2 0.033 79.68 102.1 0 65.8 0 94.8 0 5.5 0 43.6 0 0.382 4.079 0.323 0.062
6+
2019 56 2 25 0 3 0.050 79.87 102.6 0 76.3 0 -9999.9 0 5.3 0 43.6 0 0.509 1.920 0.215 0.059

pvlib/data/msn19056.dat

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Madison
2+
43.07250 -89.41133 271 -6 version 1
3+
2019 56 2 25 0 0 0.000 94.28 -2.3 0 0.0 0 0.4 0 -9999.9 1 -9999.9 1 187.2 0 265.6 0 265.3 0 0.000 0.000 0.000 -9999.900 0.002 26.000 27.000
4+
2019 56 2 25 0 1 0.017 94.46 -2.3 0 0.0 0 0.1 0 -9999.9 1 -9999.9 1 188.2 0 265.6 0 265.3 0 0.133 0.128 0.223 -9999.900 0.001 26.000 72.000
5+
2019 56 2 25 0 2 0.033 94.64 -2.7 0 -0.2 0 0.0 0 -9999.9 1 -9999.9 1 187.6 0 265.6 0 265.3 0 0.000 0.257 0.000 -9999.900 0.001 24.000 42.000
6+
2019 56 2 25 0 3 0.050 94.82 -2.5 0 0.4 0 0.0 0 -9999.9 1 -9999.9 1 187.3 0 265.6 0 265.3 0 0.266 0.385 0.000 -9999.900 0.001 26.000 48.000

pvlib/iotools/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@
88
from pvlib.iotools.ecmwf_macc import read_ecmwf_macc # noqa: F401
99
from pvlib.iotools.ecmwf_macc import get_ecmwf_macc # noqa: F401
1010
from pvlib.iotools.crn import read_crn # noqa: F401
11+
from pvlib.iotools.solrad import read_solrad # noqa: F401

pvlib/iotools/solrad.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
"""Functions to read data from the NOAA SOLRAD network.
2+
"""
3+
4+
import numpy as np
5+
import pandas as pd
6+
7+
# pvlib conventions
8+
BASE_HEADERS = (
9+
'year', 'julian_day', 'month', 'day', 'hour', 'minute', 'decimal_time',
10+
'solar_zenith', 'ghi', 'ghi_flag', 'dni', 'dni_flag', 'dhi', 'dhi_flag',
11+
'uvb', 'uvb_flag', 'uvb_temp', 'uvb_temp_flag'
12+
)
13+
14+
# following README_SOLRAD.txt variable names for remaining
15+
STD_HEADERS = ('std_dw_psp', 'std_direct', 'std_diffuse', 'std_uvb')
16+
17+
HEADERS = BASE_HEADERS + STD_HEADERS
18+
19+
DPIR_HEADERS = ('dpir', 'dpir_flag', 'dpirc', 'dpirc_flag', 'dpird',
20+
'dpird_flag')
21+
22+
MADISON_HEADERS = BASE_HEADERS + DPIR_HEADERS + STD_HEADERS + (
23+
'std_dpir', 'std_dpirc', 'std_dpird')
24+
25+
26+
# as specified in README_SOLRAD.txt file. excludes 1 space between columns
27+
WIDTHS = [4, 3] + 4*[2] + [6, 6] + 5*[7, 1] + 4*[9]
28+
MADISON_WIDTHS = [4, 3] + 4*[2] + [6, 6] + 8*[7, 1] + 7*[9]
29+
# add 1 to make fields contiguous (required by pandas.read_fwf)
30+
WIDTHS = [w + 1 for w in WIDTHS]
31+
MADISON_WIDTHS = [w + 1 for w in MADISON_WIDTHS]
32+
# no space after last column
33+
WIDTHS[-1] -= 1
34+
MADISON_WIDTHS[-1] -= 1
35+
36+
DTYPES = [
37+
'int64', 'int64', 'int64', 'int64', 'int64', 'int64', 'float64',
38+
'float64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64',
39+
'float64', 'int64', 'float64', 'int64', 'float64', 'float64',
40+
'float64', 'float64']
41+
42+
MADISON_DTYPES = [
43+
'int64', 'int64', 'int64', 'int64', 'int64', 'int64', 'float64', 'float64',
44+
'float64', 'int64', 'float64', 'int64', 'float64', 'int64', 'float64',
45+
'int64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64',
46+
'float64', 'int64', 'float64', 'float64', 'float64', 'float64', 'float64',
47+
'float64', 'float64']
48+
49+
50+
def read_solrad(filename):
51+
"""
52+
Read NOAA SOLRAD [1]_ [2]_ fixed-width file into pandas dataframe.
53+
54+
Parameters
55+
----------
56+
filename: str
57+
filepath or url to read for the fixed-width file.
58+
59+
Returns
60+
-------
61+
data: Dataframe
62+
A dataframe with DatetimeIndex and all of the variables in the
63+
file.
64+
65+
Notes
66+
-----
67+
SOLRAD data resolution is described by the README_SOLRAD.txt:
68+
"Before 1-jan. 2015 the data were reported as 3-min averages;
69+
on and after 1-Jan. 2015, SOLRAD data are reported as 1-min.
70+
averages of 1-sec. samples."
71+
Here, missing data is flagged as NaN, rather than -9999.9.
72+
73+
References
74+
----------
75+
.. [1] NOAA SOLRAD Network
76+
`https://www.esrl.noaa.gov/gmd/grad/solrad/index.html
77+
<https://www.esrl.noaa.gov/gmd/grad/solrad/index.html>`_
78+
79+
.. [2] B. B. Hicks et. al., (1996), The NOAA Integrated Surface
80+
Irradiance Study (ISIS). A New Surface Radiation Monitoring
81+
Program. Bull. Amer. Meteor. Soc., 77, 2857-2864.
82+
:doi:`10.1175/1520-0477(1996)077<2857:TNISIS>2.0.CO;2`
83+
"""
84+
if 'msn' in filename:
85+
names = MADISON_HEADERS
86+
widths = MADISON_WIDTHS
87+
dtypes = MADISON_DTYPES
88+
else:
89+
names = HEADERS
90+
widths = WIDTHS
91+
dtypes = DTYPES
92+
93+
# read in data
94+
data = pd.read_fwf(filename, header=None, skiprows=2, names=names,
95+
widths=widths, na_values=-9999.9)
96+
97+
# loop here because dtype kwarg not supported in read_fwf until 0.20
98+
for (col, _dtype) in zip(data.columns, dtypes):
99+
ser = data[col].astype(_dtype)
100+
if _dtype == 'float64':
101+
# older verions of pandas/numpy read '-9999.9' as
102+
# -9999.8999999999996 and fail to set nan in read_fwf,
103+
# so manually set nan
104+
ser = ser.where(ser > -9999, other=np.nan)
105+
data[col] = ser
106+
107+
# set index
108+
# columns do not have leading 0s, so must zfill(2) to comply
109+
# with %m%d%H%M format
110+
dts = data[['month', 'day', 'hour', 'minute']].astype(str).apply(
111+
lambda x: x.str.zfill(2))
112+
dtindex = pd.to_datetime(
113+
data['year'].astype(str) + dts['month'] + dts['day'] + dts['hour'] +
114+
dts['minute'], format='%Y%m%d%H%M', utc=True)
115+
data = data.set_index(dtindex)
116+
try:
117+
# to_datetime(utc=True) does not work in older versions of pandas
118+
data = data.tz_localize('UTC')
119+
except TypeError:
120+
pass
121+
122+
return data

pvlib/test/conftest.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import inspect
2+
import os
13
import platform
24

35
import numpy as np
@@ -33,6 +35,12 @@ def inner():
3335
return wrapper
3436

3537

38+
# commonly used directories in the tests
39+
test_dir = os.path.dirname(
40+
os.path.abspath(inspect.getfile(inspect.currentframe())))
41+
data_dir = os.path.join(test_dir, os.pardir, 'data')
42+
43+
3644
has_python2 = parse_version(platform.python_version()) < parse_version('3')
3745

3846
platform_is_windows = platform.system() == 'Windows'

pvlib/test/test_solrad.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import os
2+
3+
import pandas as pd
4+
from pandas.util.testing import assert_frame_equal
5+
import numpy as np
6+
from numpy import nan
7+
8+
import pytest
9+
10+
from pvlib.iotools import solrad
11+
from conftest import data_dir
12+
13+
14+
testfile = os.path.join(data_dir, 'abq19056.dat')
15+
testfile_mad = os.path.join(data_dir, 'msn19056.dat')
16+
17+
18+
columns = [
19+
'year', 'julian_day', 'month', 'day', 'hour', 'minute', 'decimal_time',
20+
'solar_zenith', 'ghi', 'ghi_flag', 'dni', 'dni_flag', 'dhi', 'dhi_flag',
21+
'uvb', 'uvb_flag', 'uvb_temp', 'uvb_temp_flag', 'std_dw_psp', 'std_direct',
22+
'std_diffuse', 'std_uvb']
23+
index = pd.DatetimeIndex(['2019-02-25 00:00:00',
24+
'2019-02-25 00:01:00',
25+
'2019-02-25 00:02:00',
26+
'2019-02-25 00:03:00'],
27+
freq=None).tz_localize('UTC')
28+
values = np.array([
29+
[2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00, 0.000e+00,
30+
0.000e+00, 7.930e+01, 1.045e+02, 0.000e+00, 6.050e+01, 0.000e+00,
31+
9.780e+01, 0.000e+00, 5.900e+00, 0.000e+00, 4.360e+01, 0.000e+00,
32+
3.820e-01, 2.280e+00, 4.310e-01, 6.000e-02],
33+
[2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00, 1.000e+00,
34+
1.700e-02, 7.949e+01, 1.026e+02, 0.000e+00, 5.970e+01, 0.000e+00,
35+
9.620e+01, 0.000e+00, 5.700e+00, 0.000e+00, 4.360e+01, 0.000e+00,
36+
7.640e-01, 1.800e+00, 4.310e-01, 6.000e-02],
37+
[2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00, 2.000e+00,
38+
3.300e-02, 7.968e+01, 1.021e+02, 0.000e+00, 6.580e+01, 0.000e+00,
39+
9.480e+01, 0.000e+00, 5.500e+00, 0.000e+00, 4.360e+01, 0.000e+00,
40+
3.820e-01, 4.079e+00, 3.230e-01, 6.000e-02],
41+
[2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00, 3.000e+00,
42+
5.000e-02, 7.987e+01, 1.026e+02, 0.000e+00, 7.630e+01, 0.000e+00,
43+
nan, 0.000e+00, 5.300e+00, 0.000e+00, 4.360e+01, 0.000e+00,
44+
5.090e-01, 1.920e+00, 2.150e-01, 5.000e-02]])
45+
dtypes = [
46+
'int64', 'int64', 'int64', 'int64', 'int64', 'int64', 'float64',
47+
'float64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64',
48+
'float64', 'int64', 'float64', 'int64', 'float64', 'float64',
49+
'float64', 'float64']
50+
51+
columns_mad = [
52+
'year', 'julian_day', 'month', 'day', 'hour', 'minute', 'decimal_time',
53+
'solar_zenith', 'ghi', 'ghi_flag', 'dni', 'dni_flag', 'dhi', 'dhi_flag',
54+
'uvb', 'uvb_flag', 'uvb_temp', 'uvb_temp_flag', 'dpir', 'dpir_flag',
55+
'dpirc', 'dpirc_flag', 'dpird', 'dpird_flag', 'std_dw_psp',
56+
'std_direct', 'std_diffuse', 'std_uvb', 'std_dpir', 'std_dpirc',
57+
'std_dpird']
58+
values_mad = np.array([
59+
[ 2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00,
60+
0.000e+00, 0.000e+00, 9.428e+01, -2.300e+00, 0.000e+00,
61+
0.000e+00, 0.000e+00, 4.000e-01, 0.000e+00, nan,
62+
1.000e+00, nan, 1.000e+00, 1.872e+02, 0.000e+00,
63+
2.656e+02, 0.000e+00, 2.653e+02, 0.000e+00, 0.000e+00,
64+
0.000e+00, 0.000e+00, nan, 2.000e-03, 2.600e+01,
65+
2.700e+01],
66+
[ 2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00,
67+
1.000e+00, 1.700e-02, 9.446e+01, -2.300e+00, 0.000e+00,
68+
0.000e+00, 0.000e+00, 1.000e-01, 0.000e+00, nan,
69+
1.000e+00, nan, 1.000e+00, 1.882e+02, 0.000e+00,
70+
2.656e+02, 0.000e+00, 2.653e+02, 0.000e+00, 1.330e-01,
71+
1.280e-01, 2.230e-01, nan, 1.000e-03, 2.600e+01,
72+
7.200e+01],
73+
[ 2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00,
74+
2.000e+00, 3.300e-02, 9.464e+01, -2.700e+00, 0.000e+00,
75+
-2.000e-01, 0.000e+00, 0.000e+00, 0.000e+00, nan,
76+
1.000e+00, nan, 1.000e+00, 1.876e+02, 0.000e+00,
77+
2.656e+02, 0.000e+00, 2.653e+02, 0.000e+00, 0.000e+00,
78+
2.570e-01, 0.000e+00, nan, 1.000e-03, 2.400e+01,
79+
4.200e+01],
80+
[ 2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00,
81+
3.000e+00, 5.000e-02, 9.482e+01, -2.500e+00, 0.000e+00,
82+
4.000e-01, 0.000e+00, 0.000e+00, 0.000e+00, nan,
83+
1.000e+00, nan, 1.000e+00, 1.873e+02, 0.000e+00,
84+
2.656e+02, 0.000e+00, 2.653e+02, 0.000e+00, 2.660e-01,
85+
3.850e-01, 0.000e+00, nan, 1.000e-03, 2.600e+01,
86+
4.800e+01]])
87+
dtypes_mad = [
88+
'int64', 'int64', 'int64', 'int64', 'int64', 'int64', 'float64', 'float64',
89+
'float64', 'int64', 'float64', 'int64', 'float64', 'int64', 'float64',
90+
'int64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64',
91+
'float64', 'int64', 'float64', 'float64', 'float64', 'float64', 'float64',
92+
'float64', 'float64']
93+
94+
95+
@pytest.mark.parametrize('testfile,index,columns,values,dtypes', [
96+
(testfile, index, columns, values, dtypes),
97+
(testfile_mad, index, columns_mad, values_mad, dtypes_mad)
98+
])
99+
def test_read_solrad(testfile, index, columns, values, dtypes):
100+
expected = pd.DataFrame(values, columns=columns, index=index)
101+
for (col, _dtype) in zip(expected.columns, dtypes):
102+
expected[col] = expected[col].astype(_dtype)
103+
out = solrad.read_solrad(testfile)
104+
assert_frame_equal(out, expected)

0 commit comments

Comments
 (0)