Skip to content

Commit d6dbd2e

Browse files
committed
backport msgpack check fix from python-scrapinghub
scrapinghub/python-scrapinghub#31
1 parent 86302f2 commit d6dbd2e

File tree

4 files changed

+103
-7
lines changed

4 files changed

+103
-7
lines changed

hubstorage/collectionsrt.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,43 @@
11
import re
2+
23
from requests.exceptions import HTTPError
4+
35
from .resourcetype import DownloadableResource
6+
from .serialization import MSGPACK_AVAILABLE
47
from .utils import urlpathjoin
58

9+
COLLECTIONS_MSGPACK_REGEX = re.compile(
10+
r"""(v?c?s) # collection type
11+
/\w+ # collection name
12+
(
13+
/? # no key
14+
| # OR
15+
/(?P<key>[^/]+)/? # item key
16+
)
17+
$
18+
""",
19+
re.VERBOSE)
20+
621

722
class Collections(DownloadableResource):
823

924
resource_type = 'collections'
1025

26+
def _allows_mpack(self, path=None):
27+
"""Check if request can be served with msgpack data.
28+
29+
Collection scan and get requests for keys are able to return msgpack data.
30+
31+
:param path: None, tuple or string
32+
33+
"""
34+
if not MSGPACK_AVAILABLE:
35+
return False
36+
path = urlpathjoin(path or '')
37+
match = COLLECTIONS_MSGPACK_REGEX.match(path)
38+
# count endpoint doesn't support msgpack
39+
return bool(match and match.group('key') != 'count')
40+
1141
def get(self, _type, _name, _key=None, **params):
1242
try:
1343
r = self.apiget((_type, _name, _key), params=params)

hubstorage/resourcetype.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,22 @@ def __init__(self, client, key, auth=None):
2323
self.url = urlpathjoin(client.endpoint, self.key)
2424

2525
def _allows_mpack(self, path=None):
26-
""" Check if request can be served with msgpack data.
26+
"""Check if request can be served with msgpack data.
2727
28-
Currently, items, logs, collections and samples endpoints are able to
28+
Currently, items, logs and samples endpoints are able to
2929
return msgpack data. However, /stats calls can only return JSON data
3030
for now.
31+
32+
:param path: None, tuple or string
33+
3134
"""
32-
if not MSGPACK_AVAILABLE or path == 'stats':
35+
if not MSGPACK_AVAILABLE:
3336
return False
34-
return self.resource_type in ('items', 'logs',
35-
'collections', 'samples')
37+
path = urlpathjoin(path or '')
38+
return (
39+
self.resource_type in ('items', 'logs', 'samples') and
40+
not path.rstrip('/').endswith('stats')
41+
)
3642

3743
@staticmethod
3844
def _enforce_msgpack(**kwargs):

tests/test_collections.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,12 @@
22
Test Collections
33
"""
44
import random
5+
6+
import pytest
57
from six.moves import range
68
from contextlib import closing
9+
10+
from hubstorage import HubstorageClient
711
from .hstestcase import HSTestCase
812
from .testutil import failing_downloader
913

@@ -121,3 +125,31 @@ def test_invalid_collection_name(self):
121125
self.assertRaises(ValueError, cols.new_store, '/foo')
122126
self.assertRaises(ValueError, cols.create_writer, 'invalidtype', 'n')
123127
self.assertRaises(ValueError, cols.create_writer, 's', 'foo-bar')
128+
129+
130+
@pytest.mark.parametrize('msgpack_available', [True, False])
131+
@pytest.mark.parametrize('path,expected_result', [
132+
('s/foo', True),
133+
('s/foo/', True),
134+
(('s', 'foo'), True),
135+
('s/foo/bar', True),
136+
('s/foo/bar/', True),
137+
(('s', 'foo', 'bar'), True),
138+
('vs/foo/bar/', True),
139+
('cs/foo/bar/', True),
140+
('vcs/foo/bar/', True),
141+
('s/foo/scan', True),
142+
('s/foo/bar/baz', False),
143+
('s/foo/count', False),
144+
(('s', 'foo', 'count'), False),
145+
('x/foo', False),
146+
(('x', 'foo'), False),
147+
('list', False),
148+
(None, False),
149+
])
150+
def test_allows_msgpack(monkeypatch, msgpack_available, path, expected_result):
151+
monkeypatch.setattr(
152+
'hubstorage.collectionsrt.MSGPACK_AVAILABLE', msgpack_available)
153+
hsclient = HubstorageClient()
154+
collections = hsclient.get_project(2222000).collections
155+
assert collections._allows_mpack(path) is (msgpack_available and expected_result)

tests/test_project.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,16 @@
22
Test Project
33
"""
44
import json
5-
import six
6-
from six.moves import range
75
from random import randint, random
6+
7+
import pytest
8+
import six
89
from requests.exceptions import HTTPError
10+
from six.moves import range
11+
912
from hubstorage import HubstorageClient
1013
from hubstorage.utils import millitime
14+
1115
from .hstestcase import HSTestCase
1216
from .testutil import failing_downloader
1317

@@ -252,3 +256,27 @@ def test_output_string(self):
252256
job.close_writers()
253257
items = self.hsclient.get_job(job.key).items.iter_json()
254258
self.assertEqual(type(next(items)), str)
259+
260+
261+
@pytest.mark.parametrize('msgpack_available', [True, False])
262+
@pytest.mark.parametrize('path,expected_result', [
263+
(None, True),
264+
('33/1', True),
265+
('33/1/', True),
266+
((33, 1), True),
267+
('stats', False),
268+
('stats/', False),
269+
('33/1/stats', False),
270+
('33/1/stats/', False),
271+
((33, 1, 'stats'), False),
272+
])
273+
def test_allows_msgpack(monkeypatch, msgpack_available, path, expected_result):
274+
monkeypatch.setattr(
275+
'hubstorage.resourcetype.MSGPACK_AVAILABLE', msgpack_available)
276+
hsclient = HubstorageClient()
277+
job = hsclient.get_job('2222000/1/1')
278+
for resource in [job.items, job.logs, job.samples]:
279+
assert resource._allows_mpack(path) is (msgpack_available and expected_result)
280+
assert job.requests._allows_mpack(path) is False
281+
assert job.metadata._allows_mpack(path) is False
282+
assert job.jobq._allows_mpack(path) is False

0 commit comments

Comments
 (0)