diff --git a/scrapinghub/hubstorage/collectionsrt.py b/scrapinghub/hubstorage/collectionsrt.py index 80418636..ce288bfb 100644 --- a/scrapinghub/hubstorage/collectionsrt.py +++ b/scrapinghub/hubstorage/collectionsrt.py @@ -1,13 +1,45 @@ import re + from requests.exceptions import HTTPError + from .resourcetype import DownloadableResource +from .serialization import MSGPACK_AVAILABLE from .utils import urlpathjoin +COLLECTIONS_MSGPACK_REGEX = re.compile( + r"""(v?c?s) # collection type + /\w+ # collection name + ( + /? # no key + | # OR + /(?P[^/]+)/? # item key + ) + $ + """, + re.VERBOSE) + + class Collections(DownloadableResource): resource_type = 'collections' + def _allows_mpack(self, path=None): + """Check if request can be served with msgpack data. + + Collection scan and get requests for keys are able to return msgpack data. + + :param path: None, tuple or string + + """ + if not MSGPACK_AVAILABLE: + return False + # path + path = urlpathjoin(path or '') + match = COLLECTIONS_MSGPACK_REGEX.match(path) + # count endpoint doesn't support msgpack + return bool(match and match.group('key') != 'count') + def get(self, _type, _name, _key=None, **params): try: r = self.apiget((_type, _name, _key), params=params) diff --git a/scrapinghub/hubstorage/resourcetype.py b/scrapinghub/hubstorage/resourcetype.py index bc81fdc7..061963a7 100644 --- a/scrapinghub/hubstorage/resourcetype.py +++ b/scrapinghub/hubstorage/resourcetype.py @@ -23,16 +23,22 @@ def __init__(self, client, key, auth=None): self.url = urlpathjoin(client.endpoint, self.key) def _allows_mpack(self, path=None): - """ Check if request can be served with msgpack data. + """Check if request can be served with msgpack data. - Currently, items, logs, collections and samples endpoints are able to + Currently, items, logs and samples endpoints are able to return msgpack data. However, /stats calls can only return JSON data for now. + + :param path: None, tuple or string + """ - if not MSGPACK_AVAILABLE or path == 'stats': + if not MSGPACK_AVAILABLE: return False - return self.resource_type in ('items', 'logs', - 'collections', 'samples') + path = urlpathjoin(path or '') + return ( + self.resource_type in ('items', 'logs', 'samples') and + not path.rstrip('/').endswith('stats') + ) @staticmethod def _enforce_msgpack(**kwargs): diff --git a/tests/hubstorage/hstestcase.py b/tests/hubstorage/hstestcase.py index a673c7c1..8b09c449 100644 --- a/tests/hubstorage/hstestcase.py +++ b/tests/hubstorage/hstestcase.py @@ -2,8 +2,9 @@ import unittest import random import requests -from hubstorage import HubstorageClient -from hubstorage.utils import urlpathjoin + +from scrapinghub import HubstorageClient +from scrapinghub.hubstorage.utils import urlpathjoin class HSTestCase(unittest.TestCase): diff --git a/tests/hubstorage/test_batchuploader.py b/tests/hubstorage/test_batchuploader.py index 77c51720..b42e6732 100644 --- a/tests/hubstorage/test_batchuploader.py +++ b/tests/hubstorage/test_batchuploader.py @@ -5,7 +5,8 @@ from six.moves import range from collections import defaultdict from .hstestcase import HSTestCase -from hubstorage import ValueTooLarge + +from scrapinghub.hubstorage import ValueTooLarge class BatchUploaderTest(HSTestCase): diff --git a/tests/hubstorage/test_client.py b/tests/hubstorage/test_client.py index df936e3a..f2b93071 100644 --- a/tests/hubstorage/test_client.py +++ b/tests/hubstorage/test_client.py @@ -2,8 +2,8 @@ Test Client """ from .hstestcase import HSTestCase -from hubstorage import HubstorageClient -from hubstorage.utils import apipoll +from scrapinghub import HubstorageClient +from scrapinghub.hubstorage.utils import apipoll class ClientTest(HSTestCase): diff --git a/tests/hubstorage/test_collections.py b/tests/hubstorage/test_collections.py index faba2c4c..50cb9454 100644 --- a/tests/hubstorage/test_collections.py +++ b/tests/hubstorage/test_collections.py @@ -2,8 +2,12 @@ Test Collections """ import random -from six.moves import range from contextlib import closing + +import pytest +from scrapinghub import HubstorageClient +from six.moves import range + from .hstestcase import HSTestCase from .testutil import failing_downloader @@ -121,3 +125,31 @@ def test_invalid_collection_name(self): self.assertRaises(ValueError, cols.new_store, '/foo') self.assertRaises(ValueError, cols.create_writer, 'invalidtype', 'n') self.assertRaises(ValueError, cols.create_writer, 's', 'foo-bar') + + +@pytest.mark.parametrize('msgpack_available', [True, False]) +@pytest.mark.parametrize('path,expected_result', [ + ('s/foo', True), + ('s/foo/', True), + (('s', 'foo'), True), + ('s/foo/bar', True), + ('s/foo/bar/', True), + (('s', 'foo', 'bar'), True), + ('vs/foo/bar/', True), + ('cs/foo/bar/', True), + ('vcs/foo/bar/', True), + ('s/foo/scan', True), + ('s/foo/bar/baz', False), + ('s/foo/count', False), + (('s', 'foo', 'count'), False), + ('x/foo', False), + (('x', 'foo'), False), + ('list', False), + (None, False), +]) +def test_allows_msgpack(monkeypatch, msgpack_available, path, expected_result): + monkeypatch.setattr( + 'scrapinghub.hubstorage.collectionsrt.MSGPACK_AVAILABLE', msgpack_available) + hsclient = HubstorageClient() + collections = hsclient.get_project(2222000).collections + assert collections._allows_mpack(path) is (msgpack_available and expected_result) diff --git a/tests/hubstorage/test_jobq.py b/tests/hubstorage/test_jobq.py index 4f1e2620..1d48a635 100644 --- a/tests/hubstorage/test_jobq.py +++ b/tests/hubstorage/test_jobq.py @@ -2,10 +2,13 @@ Test JobQ """ import os, unittest + import six from six.moves import range -from hubstorage.jobq import DuplicateJobError -from hubstorage.utils import apipoll + +from scrapinghub.hubstorage.jobq import DuplicateJobError +from scrapinghub.hubstorage.utils import apipoll + from .hstestcase import HSTestCase diff --git a/tests/hubstorage/test_project.py b/tests/hubstorage/test_project.py index 8d61c25c..5a0e79a4 100644 --- a/tests/hubstorage/test_project.py +++ b/tests/hubstorage/test_project.py @@ -2,12 +2,16 @@ Test Project """ import json +from random import randint, random + +import pytest import six from six.moves import range -from random import randint, random from requests.exceptions import HTTPError -from hubstorage import HubstorageClient -from hubstorage.utils import millitime + +from scrapinghub import HubstorageClient +from scrapinghub.hubstorage.utils import millitime + from .hstestcase import HSTestCase from .testutil import failing_downloader @@ -252,3 +256,27 @@ def test_output_string(self): job.close_writers() items = self.hsclient.get_job(job.key).items.iter_json() self.assertEqual(type(next(items)), str) + + +@pytest.mark.parametrize('msgpack_available', [True, False]) +@pytest.mark.parametrize('path,expected_result', [ + (None, True), + ('33/1', True), + ('33/1/', True), + ((33, 1), True), + ('stats', False), + ('stats/', False), + ('33/1/stats', False), + ('33/1/stats/', False), + ((33, 1, 'stats'), False), +]) +def test_allows_msgpack(monkeypatch, msgpack_available, path, expected_result): + monkeypatch.setattr( + 'scrapinghub.hubstorage.resourcetype.MSGPACK_AVAILABLE', msgpack_available) + hsclient = HubstorageClient() + job = hsclient.get_job('2222000/1/1') + for resource in [job.items, job.logs, job.samples]: + assert resource._allows_mpack(path) is (msgpack_available and expected_result) + assert job.requests._allows_mpack(path) is False + assert job.metadata._allows_mpack(path) is False + assert job.jobq._allows_mpack(path) is False diff --git a/tests/hubstorage/test_retry.py b/tests/hubstorage/test_retry.py index 91b7de45..873796a9 100644 --- a/tests/hubstorage/test_retry.py +++ b/tests/hubstorage/test_retry.py @@ -1,14 +1,16 @@ """ Test Retry Policy """ -from six.moves.http_client import BadStatusLine -from requests import HTTPError, ConnectionError -from .hstestcase import HSTestCase -from hubstorage import HubstorageClient -import responses import json import re +import responses +from requests import HTTPError, ConnectionError +from scrapinghub import HubstorageClient +from six.moves.http_client import BadStatusLine + +from .hstestcase import HSTestCase + GET = responses.GET POST = responses.POST DELETE = responses.DELETE diff --git a/tests/hubstorage/test_system.py b/tests/hubstorage/test_system.py index c6b6e8e5..fe475391 100644 --- a/tests/hubstorage/test_system.py +++ b/tests/hubstorage/test_system.py @@ -1,9 +1,12 @@ import random -from six.moves import range from contextlib import closing + +from six.moves import range + +from scrapinghub import HubstorageClient +from scrapinghub.hubstorage.utils import millitime + from .hstestcase import HSTestCase -from hubstorage import HubstorageClient -from hubstorage.utils import millitime class SystemTest(HSTestCase):