Skip to content

Commit e456071

Browse files
committed
Fix ResourceType._allows_mpack check.
Existing check is broken. It works for items, logs and samples only if one uses them through Job object. However it doesn't work correctly for `list(proj.items.apiget('33/5/stats'))`). Also for collections it applies the same rules as for items, but only few collections endpoints support msgpack - scan, batch scan and get collection item. Other endpoints do no support msgpack.
1 parent ec71641 commit e456071

File tree

10 files changed

+132
-24
lines changed

10 files changed

+132
-24
lines changed

scrapinghub/hubstorage/collectionsrt.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,45 @@
11
import re
2+
23
from requests.exceptions import HTTPError
4+
35
from .resourcetype import DownloadableResource
6+
from .serialization import MSGPACK_AVAILABLE
47
from .utils import urlpathjoin
58

69

10+
COLLECTIONS_MSGPACK_REGEX = re.compile(
11+
r"""(v?c?s) # collection type
12+
/\w+ # collection name
13+
(
14+
/? # no key
15+
| # OR
16+
/(?P<key>[^/]+)/? # item key
17+
)
18+
$
19+
""",
20+
re.VERBOSE)
21+
22+
723
class Collections(DownloadableResource):
824

925
resource_type = 'collections'
1026

27+
def _allows_mpack(self, path=None):
28+
"""Check if request can be served with msgpack data.
29+
30+
Collection scan and get requests for keys are able to return msgpack data.
31+
32+
:param path: None, tuple or string
33+
34+
"""
35+
if not MSGPACK_AVAILABLE:
36+
return False
37+
# path
38+
path = urlpathjoin(path or '')
39+
match = COLLECTIONS_MSGPACK_REGEX.match(path)
40+
# count endpoint doesn't support msgpack
41+
return bool(match and match.group('key') != 'count')
42+
1143
def get(self, _type, _name, _key=None, **params):
1244
try:
1345
r = self.apiget((_type, _name, _key), params=params)

scrapinghub/hubstorage/resourcetype.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,22 @@ def __init__(self, client, key, auth=None):
2323
self.url = urlpathjoin(client.endpoint, self.key)
2424

2525
def _allows_mpack(self, path=None):
26-
""" Check if request can be served with msgpack data.
26+
"""Check if request can be served with msgpack data.
2727
28-
Currently, items, logs, collections and samples endpoints are able to
28+
Currently, items, logs and samples endpoints are able to
2929
return msgpack data. However, /stats calls can only return JSON data
3030
for now.
31+
32+
:param path: None, tuple or string
33+
3134
"""
32-
if not MSGPACK_AVAILABLE or path == 'stats':
35+
if not MSGPACK_AVAILABLE:
3336
return False
34-
return self.resource_type in ('items', 'logs',
35-
'collections', 'samples')
37+
path = urlpathjoin(path or '')
38+
return (
39+
self.resource_type in ('items', 'logs', 'samples') and
40+
not path.rstrip('/').endswith('stats')
41+
)
3642

3743
@staticmethod
3844
def _enforce_msgpack(**kwargs):

tests/hubstorage/hstestcase.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
import unittest
33
import random
44
import requests
5-
from hubstorage import HubstorageClient
6-
from hubstorage.utils import urlpathjoin
5+
6+
from scrapinghub import HubstorageClient
7+
from scrapinghub.hubstorage.utils import urlpathjoin
78

89

910
class HSTestCase(unittest.TestCase):

tests/hubstorage/test_batchuploader.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
from six.moves import range
66
from collections import defaultdict
77
from .hstestcase import HSTestCase
8-
from hubstorage import ValueTooLarge
8+
9+
from scrapinghub.hubstorage import ValueTooLarge
910

1011

1112
class BatchUploaderTest(HSTestCase):

tests/hubstorage/test_client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
Test Client
33
"""
44
from .hstestcase import HSTestCase
5-
from hubstorage import HubstorageClient
6-
from hubstorage.utils import apipoll
5+
from scrapinghub import HubstorageClient
6+
from scrapinghub.hubstorage.utils import apipoll
77

88
class ClientTest(HSTestCase):
99

tests/hubstorage/test_collections.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,12 @@
22
Test Collections
33
"""
44
import random
5-
from six.moves import range
65
from contextlib import closing
6+
7+
import pytest
8+
from scrapinghub import HubstorageClient
9+
from six.moves import range
10+
711
from .hstestcase import HSTestCase
812
from .testutil import failing_downloader
913

@@ -121,3 +125,31 @@ def test_invalid_collection_name(self):
121125
self.assertRaises(ValueError, cols.new_store, '/foo')
122126
self.assertRaises(ValueError, cols.create_writer, 'invalidtype', 'n')
123127
self.assertRaises(ValueError, cols.create_writer, 's', 'foo-bar')
128+
129+
130+
@pytest.mark.parametrize('msgpack_available', [True, False])
131+
@pytest.mark.parametrize('path,expected_result', [
132+
('s/foo', True),
133+
('s/foo/', True),
134+
(('s', 'foo'), True),
135+
('s/foo/bar', True),
136+
('s/foo/bar/', True),
137+
(('s', 'foo', 'bar'), True),
138+
('vs/foo/bar/', True),
139+
('cs/foo/bar/', True),
140+
('vcs/foo/bar/', True),
141+
('s/foo/scan', True),
142+
('s/foo/bar/baz', False),
143+
('s/foo/count', False),
144+
(('s', 'foo', 'count'), False),
145+
('x/foo', False),
146+
(('x', 'foo'), False),
147+
('list', False),
148+
(None, False),
149+
])
150+
def test_allows_msgpack(monkeypatch, msgpack_available, path, expected_result):
151+
monkeypatch.setattr(
152+
'scrapinghub.hubstorage.collectionsrt.MSGPACK_AVAILABLE', msgpack_available)
153+
hsclient = HubstorageClient()
154+
collections = hsclient.get_project(2222000).collections
155+
assert collections._allows_mpack(path) is (msgpack_available and expected_result)

tests/hubstorage/test_jobq.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@
22
Test JobQ
33
"""
44
import os, unittest
5+
56
import six
67
from six.moves import range
7-
from hubstorage.jobq import DuplicateJobError
8-
from hubstorage.utils import apipoll
8+
9+
from scrapinghub.hubstorage.jobq import DuplicateJobError
10+
from scrapinghub.hubstorage.utils import apipoll
11+
912
from .hstestcase import HSTestCase
1013

1114

tests/hubstorage/test_project.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,16 @@
22
Test Project
33
"""
44
import json
5+
from random import randint, random
6+
7+
import pytest
58
import six
69
from six.moves import range
7-
from random import randint, random
810
from requests.exceptions import HTTPError
9-
from hubstorage import HubstorageClient
10-
from hubstorage.utils import millitime
11+
12+
from scrapinghub import HubstorageClient
13+
from scrapinghub.hubstorage.utils import millitime
14+
1115
from .hstestcase import HSTestCase
1216
from .testutil import failing_downloader
1317

@@ -252,3 +256,27 @@ def test_output_string(self):
252256
job.close_writers()
253257
items = self.hsclient.get_job(job.key).items.iter_json()
254258
self.assertEqual(type(next(items)), str)
259+
260+
261+
@pytest.mark.parametrize('msgpack_available', [True, False])
262+
@pytest.mark.parametrize('path,expected_result', [
263+
(None, True),
264+
('33/1', True),
265+
('33/1/', True),
266+
((33, 1), True),
267+
('stats', False),
268+
('stats/', False),
269+
('33/1/stats', False),
270+
('33/1/stats/', False),
271+
((33, 1, 'stats'), False),
272+
])
273+
def test_allows_msgpack(monkeypatch, msgpack_available, path, expected_result):
274+
monkeypatch.setattr(
275+
'scrapinghub.hubstorage.resourcetype.MSGPACK_AVAILABLE', msgpack_available)
276+
hsclient = HubstorageClient()
277+
job = hsclient.get_job('2222000/1/1')
278+
for resource in [job.items, job.logs, job.samples]:
279+
assert resource._allows_mpack(path) is (msgpack_available and expected_result)
280+
assert job.requests._allows_mpack(path) is False
281+
assert job.metadata._allows_mpack(path) is False
282+
assert job.jobq._allows_mpack(path) is False

tests/hubstorage/test_retry.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
"""
22
Test Retry Policy
33
"""
4-
from six.moves.http_client import BadStatusLine
5-
from requests import HTTPError, ConnectionError
6-
from .hstestcase import HSTestCase
7-
from hubstorage import HubstorageClient
8-
import responses
94
import json
105
import re
116

7+
import responses
8+
from requests import HTTPError, ConnectionError
9+
from scrapinghub import HubstorageClient
10+
from six.moves.http_client import BadStatusLine
11+
12+
from .hstestcase import HSTestCase
13+
1214
GET = responses.GET
1315
POST = responses.POST
1416
DELETE = responses.DELETE

tests/hubstorage/test_system.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
import random
2-
from six.moves import range
32
from contextlib import closing
3+
4+
from six.moves import range
5+
6+
from scrapinghub import HubstorageClient
7+
from scrapinghub.hubstorage.utils import millitime
8+
49
from .hstestcase import HSTestCase
5-
from hubstorage import HubstorageClient
6-
from hubstorage.utils import millitime
710

811

912
class SystemTest(HSTestCase):

0 commit comments

Comments
 (0)