scrapinghub · vshlapakov · Oct 31, 2016 · Oct 18, 2016 · Oct 18, 2016 · Oct 18, 2016
diff --git a/requirements-test.txt b/requirements-test.txt
@@ -1,6 +1,7 @@
 -r requirements-pypy.txt
 
 mock
+vcrpy==1.10.3
 pytest
 pytest-cov
-responses==0.5.0
+responses==0.5.0
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--update-cassettes", action="store_true", default=False,
+        help="test with real services rewriting existing vcr cassettes")
+    parser.addoption(
+        "--ignore-cassettes", action="store_true", default=False,
+        help="test with real services skipping existing vcr cassettes")
diff --git a/tests/hubstorage/conftest.py b/tests/hubstorage/conftest.py
@@ -0,0 +1,185 @@
+import os
+import zlib
+import base64
+import pickle
+
+import vcr
+import pytest
+import shutil
+import requests
+from requests import HTTPError
+
+from scrapinghub import HubstorageClient
+from scrapinghub.hubstorage.utils import urlpathjoin
+
+
+TEST_PROJECT_ID = "2222222"
+TEST_SPIDER_NAME = 'hs-test-spider'
+TEST_FRONTIER_NAME = 'test'
+TEST_FRONTIER_SLOT = 'site.com'
+TEST_BOTGROUP = 'python-hubstorage-test'
+TEST_COLLECTION_NAME = "test_collection_123"
+TEST_AUTH = os.getenv('HS_AUTH', 'f' * 32)
+TEST_ENDPOINT = os.getenv('HS_ENDPOINT', 'http://storage.vm.scrapinghub.com')
+
+# vcrpy creates the cassetes automatically under VCR_CASSETES_DIR
+VCR_CASSETES_DIR = 'tests/hubstorage/cassetes'
+
+
+class VCRGzipSerializer(object):
+    """Custom ZIP serializer for VCR.py."""
+
+    def serialize(self, cassette_dict):
+        # receives a dict, must return a string
+        # there can be binary data inside some of the requests,
+        # so it's impossible to use json for serialization to string
+        compressed = zlib.compress(pickle.dumps(cassette_dict, protocol=2))
+        return base64.b64encode(compressed).decode('utf8')
+
+    def deserialize(self, cassette_string):
+        # receives a string, must return a dict
+        decoded = base64.b64decode(cassette_string.encode('utf8'))
+        return pickle.loads(zlib.decompress(decoded))
+
+
+my_vcr = vcr.VCR(cassette_library_dir=VCR_CASSETES_DIR, record_mode='once')
+my_vcr.register_serializer('gz', VCRGzipSerializer())
+my_vcr.serializer = 'gz'
+
+
+def pytest_configure(config):
+    if config.option.update_cassettes:
+        # there's vcr `all` mode to update cassettes but it doesn't delete
+        # or clear existing records, so its size will always only grow
+        if os.path.exists(VCR_CASSETES_DIR):
+            shutil.rmtree(VCR_CASSETES_DIR)
+    elif config.option.ignore_cassettes:
+        # simple hack to just ignore vcr cassettes:
+        # - all record_mode means recording new interactions + no replay
+        # - before_record returning None means skipping all the requests
+        global my_vcr
+        my_vcr.record_mode = 'all'
+        my_vcr.before_record_request = lambda request: None
+
+
+def is_using_real_services(request):
+    return (request.config.option.update_cassettes or
+            request.config.option.ignore_cassettes)
+
+
+@pytest.fixture(scope='session')
+def hsclient():
+    return HubstorageClient(auth=TEST_AUTH, endpoint=TEST_ENDPOINT)
+
+
+@pytest.fixture(scope='session')
+def hsproject(hsclient):
+    return hsclient.get_project(TEST_PROJECT_ID)
+
+
+@my_vcr.use_cassette()
+@pytest.fixture(scope='session')
+def hsspiderid(hsproject):
+    return str(hsproject.ids.spider(TEST_SPIDER_NAME, create=1))
+
+
+@pytest.fixture(scope='session')
+def hscollection(hsproject, request):
+    collection = get_test_collection(hsproject)
+    if is_using_real_services(request):
+        clean_collection(collection)
+    yield collection
+
+
+@pytest.fixture(autouse=True, scope='session')
+def setup_session(hsclient, hsproject, hscollection, request):
+    if is_using_real_services(request):
+        set_testbotgroup(hsproject)
+        remove_all_jobs(hsproject)
+    yield
+    hsclient.close()
+
+
+@pytest.fixture(autouse=True)
+def setup_vcrpy(request, hsproject):
+    # generates names like "test_module/test_function.yaml"
+    # otherwise it uses current function name (setup_vcrpy) for all tests
+    # other option is to add vcr decorator to each test separately
+    cassette_name = '{}/{}.gz'.format(
+        request.function.__module__.split('.')[-1],
+        request.function.__name__
+    )
+    if is_using_real_services(request):
+        remove_all_jobs(hsproject)
+    with my_vcr.use_cassette(cassette_name):
+        yield
+
+
+# ----------------------------------------------------------------------------
+
+
+def start_job(hsproject, **startparams):
+    jobdata = hsproject.jobq.start(**startparams)
+    if jobdata:
+        jobkey = jobdata.pop('key')
+        jobauth = (jobkey, jobdata['auth'])
+        return hsproject.get_job(jobkey, jobauth=jobauth, metadata=jobdata)
+
+
+# Clean environment section
+
+
+def remove_all_jobs(hsproject):
+    for k in list(hsproject.settings.keys()):
+        if k != 'botgroups':
+            del hsproject.settings[k]
+    hsproject.settings.save()
+
+    # Cleanup JobQ: run 2 times to ensure we covered all jobs
+    for queuename in ('pending', 'running', 'finished')*2:
+        info = hsproject.jobq.summary(queuename)
+        for summary in info['summary']:
+            _remove_job(hsproject, summary['key'])
+
+
+def _remove_job(hsproject, jobkey):
+    hsproject.jobq.finish(jobkey)
+    hsproject.jobq.delete(jobkey)
+    # delete job
+    assert jobkey.startswith(TEST_PROJECT_ID), jobkey
+    hsproject.jobs.apidelete(jobkey.partition('/')[2])
+
+# Collection helpers section
+
+
+def get_test_collection(project):
+    return project.collections.new_store(TEST_COLLECTION_NAME)
+
+
+def clean_collection(collection):
+    try:
+        for item in collection.iter_values():
+            collection.delete(item['_key'])
+    except HTTPError as e:
+        # if collection doesn't exist yet service responds 404
+        if e.response.status_code != 404:
+            raise
+
+
+# Botgroups helpers section
+
+
+def set_testbotgroup(hsproject):
+    hsproject.settings.apipost(jl={'botgroups': [TEST_BOTGROUP]})
+    # Additional step to populate JobQ's botgroups table
+    url = urlpathjoin(TEST_ENDPOINT, 'botgroups', TEST_BOTGROUP, 'max_running')
+    requests.post(url, auth=hsproject.auth, data='null')
+    hsproject.settings.expire()
+
+
+def unset_testbotgroup(hsproject):
+    hsproject.settings.apidelete('botgroups')
+    hsproject.settings.expire()
+    # Additional step to delete botgroups in JobQ
+    url = urlpathjoin(TEST_ENDPOINT, 'botgroups', TEST_BOTGROUP)
+    requests.delete(url, auth=hsproject.auth)
diff --git a/tests/hubstorage/hstestcase.py b/tests/hubstorage/hstestcase.py
diff --git a/tests/hubstorage/test_activity.py b/tests/hubstorage/test_activity.py
@@ -1,34 +1,33 @@
 """
 Test Activty
 """
-from .hstestcase import HSTestCase
 from six.moves import range
 
 
-class ActivityTest(HSTestCase):
+def test_post_and_reverse_get(hsproject):
+    # make some sample data
+    orig_data = [{u'foo': 42, u'counter': i} for i in range(20)]
+    data1 = orig_data[:10]
+    data2 = orig_data[10:]
 
-    def test_post_and_reverse_get(self):
-        # make some sample data
-        orig_data = [{u'foo': 42, u'counter': i} for i in range(20)]
-        data1 = orig_data[:10]
-        data2 = orig_data[10:]
+    # put ordered data in 2 separate posts
+    hsproject.activity.post(data1)
+    hsproject.activity.post(data2)
 
-        # put ordered data in 2 separate posts
-        self.project.activity.post(data1)
-        self.project.activity.post(data2)
+    # read them back in reverse chronological order
+    result = list(hsproject.activity.list(count=20))
+    assert len(result) == 20
+    assert orig_data[::-1] == result
 
-        # read them back in reverse chronological order
-        result = list(self.project.activity.list(count=20))
-        self.assertEqual(len(result), 20)
-        self.assertEqual(orig_data[::-1], result)
 
-    def test_filters(self):
-        self.project.activity.post({'c': i} for i in range(10))
-        r = list(self.project.activity.list(filter='["c", ">", [5]]', count=2))
-        self.assertEqual(r, [{'c': 9}, {'c': 8}])
+def test_filters(hsproject):
+    hsproject.activity.post({'c': i} for i in range(10))
+    r = list(hsproject.activity.list(filter='["c", ">", [5]]', count=2))
+    assert r == [{'c': 9}, {'c': 8}]
 
-    def test_timestamp(self):
-        self.project.activity.add({'foo': 'bar'}, baz='qux')
-        entry = next(self.project.activity.list(count=1, meta='_ts'))
-        self.assertTrue(entry.pop('_ts', None))
-        self.assertEqual(entry, {'foo': 'bar', 'baz': 'qux'})
+
+def test_timestamp(hsproject):
+    hsproject.activity.add({'foo': 'bar'}, baz='qux')
+    entry = next(hsproject.activity.list(count=1, meta='_ts'))
+    assert entry.pop('_ts', None)
+    assert entry == {'foo': 'bar', 'baz': 'qux'}