Skip to content

Commit 4c9196a

Browse files
committed
ENH: Add read support for Google Cloud Storage.
1 parent f483321 commit 4c9196a

File tree

2 files changed

+58
-0
lines changed

2 files changed

+58
-0
lines changed

pandas/io/common.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,14 @@ def is_s3_url(url):
168168
return False
169169

170170

171+
def is_google_cloud_storage_url(url):
172+
"""Check for a gs url"""
173+
try:
174+
return parse_url(url).scheme == 'gs'
175+
except: # noqa
176+
return False
177+
178+
171179
def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
172180
compression=None, mode=None):
173181
"""
@@ -203,6 +211,14 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
203211
compression=compression,
204212
mode=mode)
205213

214+
if is_google_cloud_storage_url(filepath_or_buffer):
215+
from pandas.io import google_cloud_storage
216+
return google_cloud_storage.get_filepath_or_buffer(
217+
filepath_or_buffer,
218+
encoding=encoding,
219+
compression=compression,
220+
mode=mode)
221+
206222
if isinstance(filepath_or_buffer, (compat.string_types,
207223
compat.binary_type,
208224
mmap.mmap)):

pandas/io/google_cloud_storage.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
""" Google Cloud Storage support for remote file interactivity """
2+
from io import BytesIO
3+
from pandas import compat
4+
try:
5+
from google.cloud.storage import Client
6+
except:
7+
raise ImportError("The google-cloud-storage library is required to "
8+
"read gs:// files")
9+
10+
if compat.PY3:
11+
from urllib.parse import urlparse as parse_url
12+
else:
13+
from urlparse import urlparse as parse_url
14+
15+
16+
def _get_bucket_name(url):
17+
"""Returns the bucket name from the gs:// url"""
18+
result = parse_url(url)
19+
return result.netloc
20+
21+
22+
def _get_object_path(url):
23+
"""Returns the object path from the gs:// url"""
24+
result = parse_url(url)
25+
return result.path.lstrip('/')
26+
27+
28+
def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
29+
compression=None, mode=None):
30+
31+
if mode is None:
32+
mode = 'rb'
33+
34+
client = Client()
35+
bucket = client.get_bucket(_get_bucket_name(filepath_or_buffer))
36+
blob = bucket.blob(_get_object_path(filepath_or_buffer))
37+
data = BytesIO()
38+
blob.download_to_file(data)
39+
data.seek(0)
40+
filepath_or_buffer = data
41+
42+
return filepath_or_buffer, None, compression

0 commit comments

Comments
 (0)