Skip to content

Commit 437be18

Browse files
wholmgrencwhanse
authored andcommitted
add CRN file parser (#666)
* initial implementation * add crn file reader * add unused cols * add to api.rst * better dtype handling * pandas to 0.16. remove py 3.3 classifier * maybe avoid issue with tz dtype specific to travis * use fixed width parsing * style * unused import * more style and doc issues
1 parent 1ecef34 commit 437be18

File tree

8 files changed

+169
-4
lines changed

8 files changed

+169
-4
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ install:
6969
pip uninstall numpy --yes;
7070
pip uninstall pandas --yes;
7171
pip install --no-cache-dir numpy==1.10.1;
72-
pip install --no-cache-dir pandas==0.15.0;
72+
pip install --no-cache-dir pandas==0.16.0;
7373
fi
7474
- conda list
7575
- echo $PATH

docs/sphinx/source/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ relevant to solar energy modeling.
337337
iotools.read_midc_raw_data_from_nrel
338338
iotools.read_ecmwf_macc
339339
iotools.get_ecmwf_macc
340+
iotools.read_crn
340341

341342
A :py:class:`~pvlib.location.Location` object may be created from metadata
342343
in some files.

docs/sphinx/source/whatsnew/v0.6.2.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,16 @@ release.
99
**Python 2.7 support will end on June 1, 2019**. Releases made after this
1010
date will require Python 3. (:issue:`501`)
1111

12+
**Minimum pandas requirement bumped 0.15.0=>0.16.0**
13+
1214

1315
API Changes
1416
~~~~~~~~~~~
1517

1618

1719
Enhancements
1820
~~~~~~~~~~~~
19-
21+
* Add US CRN data reader to `pvlib.iotools`.
2022

2123
Bug fixes
2224
~~~~~~~~~
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
53131 20190101 1610 20190101 0910 3 -111.17 32.24 -9999.0 0.0 296 0 4.4 C 0 90 0 -99.000 -9999.0 24 0 0.78 0
2+
53131 20190101 1615 20190101 0915 3 -111.17 32.24 3.3 0.0 183 0 4.0 C 0 87 0 -99.000 -9999.0 1182 0 0.36 0
3+
53131 20190101 1620 20190101 0920 3 -111.17 32.24 3.5 0.0 340 0 4.3 C 0 83 0 -99.000 -9999.0 1183 0 0.53 0
4+
53131 20190101 1625 20190101 0925 3 -111.17 32.24 4.0 0.0 393 0 4.8 C 0 81 0 -99.000 -9999.0 1223 0 0.64 0

pvlib/iotools/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@
77
from pvlib.iotools.midc import read_midc_raw_data_from_nrel # noqa: F401
88
from pvlib.iotools.ecmwf_macc import read_ecmwf_macc # noqa: F401
99
from pvlib.iotools.ecmwf_macc import get_ecmwf_macc # noqa: F401
10+
from pvlib.iotools.crn import read_crn # noqa: F401

pvlib/iotools/crn.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
"""Functions to read data from the US Climate Reference Network (CRN).
2+
"""
3+
4+
import pandas as pd
5+
import numpy as np
6+
7+
8+
HEADERS = (
9+
'WBANNO UTC_DATE UTC_TIME LST_DATE LST_TIME CRX_VN LONGITUDE LATITUDE '
10+
'AIR_TEMPERATURE PRECIPITATION SOLAR_RADIATION SR_FLAG '
11+
'SURFACE_TEMPERATURE ST_TYPE ST_FLAG RELATIVE_HUMIDITY RH_FLAG '
12+
'SOIL_MOISTURE_5 SOIL_TEMPERATURE_5 WETNESS WET_FLAG WIND_1_5 WIND_FLAG'
13+
)
14+
15+
VARIABLE_MAP = {
16+
'LONGITUDE': 'longitude',
17+
'LATITUDE': 'latitude',
18+
'AIR_TEMPERATURE': 'temp_air',
19+
'SOLAR_RADIATION': 'ghi',
20+
'SR_FLAG': 'ghi_flag',
21+
'RELATIVE_HUMIDITY': 'relative_humidity',
22+
'RH_FLAG': 'relative_humidity_flag',
23+
'WIND_1_5': 'wind_speed',
24+
'WIND_FLAG': 'wind_speed_flag'
25+
}
26+
27+
# as specified in CRN README.txt file. excludes 1 space between columns
28+
WIDTHS = [5, 8, 4, 8, 4, 6, 7, 7, 7, 7, 6, 1, 7, 1, 1, 5, 1, 7, 7, 5, 1, 6, 1]
29+
# add 1 to make fields contiguous (required by pandas.read_fwf)
30+
WIDTHS = [w + 1 for w in WIDTHS]
31+
# no space after last column
32+
WIDTHS[-1] -= 1
33+
34+
# specify dtypes for potentially problematic values
35+
DTYPES = [
36+
'int64', 'int64', 'int64', 'int64', 'int64', 'int64', 'float64', 'float64',
37+
'float64', 'float64', 'float64', 'int64', 'float64', 'O', 'int64',
38+
'float64', 'int64', 'float64', 'float64', 'int64', 'int64', 'float64',
39+
'int64'
40+
]
41+
42+
43+
def read_crn(filename):
44+
"""
45+
Read NOAA USCRN [1]_ [2]_ fixed-width file into pandas dataframe.
46+
47+
Parameters
48+
----------
49+
filename: str
50+
filepath or url to read for the fixed-width file.
51+
52+
Returns
53+
-------
54+
data: Dataframe
55+
A dataframe with DatetimeIndex and all of the variables in the
56+
file.
57+
58+
Notes
59+
-----
60+
CRN files contain 5 minute averages labeled by the interval ending
61+
time. Here, missing data is flagged as NaN, rather than the lowest
62+
possible integer for a field (e.g. -999 or -99). Air temperature in
63+
deg C. Wind speed in m/s at a height of 1.5 m above ground level.
64+
65+
Variables corresponding to standard pvlib variables are renamed,
66+
e.g. `SOLAR_RADIATION` becomes `ghi`. See the
67+
`pvlib.iotools.crn.VARIABLE_MAP` dict for the complete mapping.
68+
69+
References
70+
----------
71+
.. [1] U.S. Climate Reference Network
72+
`https://www.ncdc.noaa.gov/crn/qcdatasets.html
73+
<https://www.ncdc.noaa.gov/crn/qcdatasets.html>`_
74+
75+
.. [2] Diamond, H. J. et. al., 2013: U.S. Climate Reference Network
76+
after one decade of operations: status and assessment. Bull.
77+
Amer. Meteor. Soc., 94, 489-498. :doi:`10.1175/BAMS-D-12-00170.1`
78+
"""
79+
80+
# read in data
81+
data = pd.read_fwf(filename, header=None, names=HEADERS.split(' '),
82+
widths=WIDTHS)
83+
# loop here because dtype kwarg not supported in read_fwf until 0.20
84+
for (col, _dtype) in zip(data.columns, DTYPES):
85+
data[col] = data[col].astype(_dtype)
86+
87+
# set index
88+
# UTC_TIME does not have leading 0s, so must zfill(4) to comply
89+
# with %H%M format
90+
dts = data[['UTC_DATE', 'UTC_TIME']].astype(str)
91+
dtindex = pd.to_datetime(dts['UTC_DATE'] + dts['UTC_TIME'].str.zfill(4),
92+
format='%Y%m%d%H%M', utc=True)
93+
data = data.set_index(dtindex)
94+
try:
95+
# to_datetime(utc=True) does not work in older versions of pandas
96+
data = data.tz_localize('UTC')
97+
except TypeError:
98+
pass
99+
100+
# set nans
101+
for val in [-99, -999, -9999]:
102+
data = data.where(data != val, np.nan)
103+
104+
data = data.rename(columns=VARIABLE_MAP)
105+
106+
return data

pvlib/test/test_crn.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import inspect
2+
import os
3+
4+
import pandas as pd
5+
from pandas.util.testing import assert_frame_equal
6+
import numpy as np
7+
from numpy import dtype, nan
8+
9+
from pvlib.iotools import crn
10+
11+
12+
test_dir = os.path.dirname(
13+
os.path.abspath(inspect.getfile(inspect.currentframe())))
14+
testfile = os.path.join(test_dir,
15+
'../data/CRNS0101-05-2019-AZ_Tucson_11_W.txt')
16+
17+
18+
def test_read_crn():
19+
columns = [
20+
'WBANNO', 'UTC_DATE', 'UTC_TIME', 'LST_DATE', 'LST_TIME', 'CRX_VN',
21+
'longitude', 'latitude', 'temp_air', 'PRECIPITATION', 'ghi',
22+
'ghi_flag',
23+
'SURFACE_TEMPERATURE', 'ST_TYPE', 'ST_FLAG', 'relative_humidity',
24+
'relative_humidity_flag', 'SOIL_MOISTURE_5', 'SOIL_TEMPERATURE_5',
25+
'WETNESS', 'WET_FLAG', 'wind_speed', 'wind_speed_flag']
26+
index = pd.DatetimeIndex(['2019-01-01 16:10:00',
27+
'2019-01-01 16:15:00',
28+
'2019-01-01 16:20:00',
29+
'2019-01-01 16:25:00'],
30+
freq=None).tz_localize('UTC')
31+
values = np.array([
32+
[53131, 20190101, 1610, 20190101, 910, 3, -111.17, 32.24, nan,
33+
0.0, 296.0, 0, 4.4, 'C', 0, 90.0, 0, nan, nan, 24, 0, 0.78, 0],
34+
[53131, 20190101, 1615, 20190101, 915, 3, -111.17, 32.24, 3.3,
35+
0.0, 183.0, 0, 4.0, 'C', 0, 87.0, 0, nan, nan, 1182, 0, 0.36, 0],
36+
[53131, 20190101, 1620, 20190101, 920, 3, -111.17, 32.24, 3.5,
37+
0.0, 340.0, 0, 4.3, 'C', 0, 83.0, 0, nan, nan, 1183, 0, 0.53, 0],
38+
[53131, 20190101, 1625, 20190101, 925, 3, -111.17, 32.24, 4.0,
39+
0.0, 393.0, 0, 4.8, 'C', 0, 81.0, 0, nan, nan, 1223, 0, 0.64, 0]])
40+
dtypes = [
41+
dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
42+
dtype('int64'), dtype('int64'), dtype('float64'), dtype('float64'),
43+
dtype('float64'), dtype('float64'), dtype('float64'),
44+
dtype('int64'), dtype('float64'), dtype('O'), dtype('int64'),
45+
dtype('float64'), dtype('int64'), dtype('float64'),
46+
dtype('float64'), dtype('int64'), dtype('int64'), dtype('float64'),
47+
dtype('int64')]
48+
expected = pd.DataFrame(values, columns=columns, index=index)
49+
for (col, _dtype) in zip(expected.columns, dtypes):
50+
expected[col] = expected[col].astype(_dtype)
51+
out = crn.read_crn(testfile)
52+
assert_frame_equal(out, expected)

setup.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
URL = 'https://github.com/pvlib/pvlib-python'
3939

4040
INSTALL_REQUIRES = ['numpy >= 1.10.1',
41-
'pandas >= 0.15.0',
41+
'pandas >= 0.16.0',
4242
'pytz',
4343
'six',
4444
]
@@ -61,7 +61,6 @@
6161
'Programming Language :: Python :: 2',
6262
'Programming Language :: Python :: 2.7',
6363
'Programming Language :: Python :: 3',
64-
'Programming Language :: Python :: 3.3',
6564
'Programming Language :: Python :: 3.4',
6665
'Programming Language :: Python :: 3.5',
6766
'Programming Language :: Python :: 3.6',

0 commit comments

Comments
 (0)