Skip to content

Commit 9a578e9

Browse files
committed
read_srml function
1 parent 2087979 commit 9a578e9

File tree

2 files changed

+147
-0
lines changed

2 files changed

+147
-0
lines changed

pvlib/iotools/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
from pvlib.iotools.tmy import read_tmy2 # noqa: F401
22
from pvlib.iotools.tmy import read_tmy3 # noqa: F401
3+
from pvlib.iotools.srml import read_srml # noqa: F401

pvlib/iotools/srml.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
"""Collection of functions to get data from UO SRML.
2+
"""
3+
import numpy as np
4+
import pandas as pd
5+
6+
7+
def read_srml(filename):
8+
"""
9+
Read SRML file into pandas dataframe.
10+
11+
Parameters
12+
----------
13+
filename: str
14+
filepath or url to read for the tsv file.
15+
16+
Returns
17+
-------
18+
data: Dataframe
19+
A dataframe with datetime index and all of the variables listed
20+
in the `var_map` dict inside of the map_columns function, along
21+
with their associated quality control flags.
22+
23+
Notes
24+
-----
25+
Note that the time index is shifted back one minute to account for
26+
2400 hours, and to avoid time parsing errors on leap years. Data values
27+
on a given line should now be understood to occur during the interval
28+
extending from the time of the line in which they are listed to
29+
the ending time on the next line, rather than the previous line.
30+
"""
31+
tsv_data = pd.read_csv(filename, delimiter='\t')
32+
year = tsv_data.columns[1]
33+
data = format_index(tsv_data, year)
34+
# Rename and drop datetime columns
35+
data = data[data.columns[2:]].rename(columns=map_columns)
36+
37+
# Quality flags are all labeled 0, but occur immediately after their
38+
# associated var so we create a dict mapping them to var_flag for renaming
39+
flag_label_map = {flag: data.columns[data.columns.get_loc(flag)-1]+'_flag'
40+
for flag in data.columns[1::2]}
41+
data = data.rename(columns=flag_label_map)
42+
# For data flagged bad or missing, replace the value with np.NaN
43+
for col in data.columns[::2]:
44+
data[col] = data[col].where(~(data[col+'_flag'] == 99), np.NaN)
45+
return data
46+
47+
48+
def map_columns(col):
49+
"""Map column labels to pvlib names.
50+
51+
Parameters
52+
----------
53+
col: str
54+
Column label to be mapped.
55+
56+
Returns
57+
-------
58+
str
59+
The pvlib label if it was found in the mapping,
60+
else the original label.
61+
62+
Notes
63+
-----
64+
var_map is a dictionary mapping SRML data element numbers
65+
to their pvlib names. For most variables, only the first
66+
three numbers are used, the fourth indicating the instrument.
67+
Spectral data (7xxx) uses all four numbers to indicate the
68+
variable.
69+
"""
70+
var_map = {
71+
'100': 'ghi',
72+
'201': 'dni',
73+
'300': 'dhi',
74+
'930': 'temp_air',
75+
'931': 'temp_dew',
76+
'933': 'relative_humidity',
77+
'921': 'wind_speed',
78+
'920': 'wind_dir',
79+
}
80+
if col.startswith('7'):
81+
# spectral data
82+
try:
83+
return var_map[col]
84+
except KeyError:
85+
return col
86+
try:
87+
return var_map[col[:3]]+'_'+col[3:]
88+
except KeyError:
89+
return col
90+
91+
92+
def format_index(df, year):
93+
""" Create a datetime index from day of year, and time columns.
94+
95+
Parameters
96+
----------
97+
df: pd.Dataframe
98+
The srml data to reindex.
99+
year: int
100+
The year of the file
101+
102+
Returns
103+
-------
104+
df: pd.Dataframe
105+
The Dataframe with a datetime index applied.
106+
"""
107+
df_time = df[df.columns[1]] - 1
108+
df_doy = df[df.columns[0]]
109+
hours = df_time % 100 == 99
110+
times = df_time.where(~hours, df_time - 40)
111+
times = times.apply(lambda x: '{:04.0f}'.format(x))
112+
doy = df_doy.apply(lambda x: '{:03.0f}'.format(x))
113+
dts = pd.to_datetime(str(year) + '-' + doy + '-' + times,
114+
format='%Y-%j-%H%M')
115+
df.index = dts
116+
df = df.tz_localize('Etc/GMT+8')
117+
return df
118+
119+
120+
def request_uo_data(station, year, month, filetype='PO'):
121+
"""Read a month of SRML data from solardat into a Dataframe.
122+
123+
Parameters
124+
----------
125+
station: str
126+
The name of the SRML station to request.
127+
year: int
128+
Year to request data for
129+
month: int
130+
Month to request data for.
131+
filetype: string
132+
SRML file type to gather. 'RO' and 'PO' are the
133+
only minute resolution files.
134+
Returns
135+
-------
136+
data: pd.DataFrame
137+
One month of data from SRML.
138+
"""
139+
file_name = "{station}{filetype}{year:2d}{month:2d}.txt".format(
140+
station=station,
141+
filetype=filetype,
142+
year=year % 100,
143+
month=month)
144+
url = "http://solardat.uoregon.edu/download/Archive/"
145+
data = read_srml(url+file_name)
146+
return data

0 commit comments

Comments
 (0)