Skip to content

Pytest and vcrpy to improve sh.hubstorage tests #32

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Oct 31, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
-r requirements-pypy.txt

mock
vcrpy==1.10.3
pytest
pytest-cov
responses==0.5.0
responses==0.5.0
10 changes: 10 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-


def pytest_addoption(parser):
parser.addoption(
"--update-cassettes", action="store_true", default=False,
help="test with real services rewriting existing vcr cassettes")
parser.addoption(
"--ignore-cassettes", action="store_true", default=False,
help="test with real services skipping existing vcr cassettes")
185 changes: 185 additions & 0 deletions tests/hubstorage/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
import os
import zlib
import base64
import pickle

import vcr
import pytest
import shutil
import requests
from requests import HTTPError

from scrapinghub import HubstorageClient
from scrapinghub.hubstorage.utils import urlpathjoin


TEST_PROJECT_ID = "2222222"
TEST_SPIDER_NAME = 'hs-test-spider'
TEST_FRONTIER_NAME = 'test'
TEST_FRONTIER_SLOT = 'site.com'
TEST_BOTGROUP = 'python-hubstorage-test'
TEST_COLLECTION_NAME = "test_collection_123"
TEST_AUTH = os.getenv('HS_AUTH', 'f' * 32)
TEST_ENDPOINT = os.getenv('HS_ENDPOINT', 'http://storage.vm.scrapinghub.com')

# vcrpy creates the cassetes automatically under VCR_CASSETES_DIR
VCR_CASSETES_DIR = 'tests/hubstorage/cassetes'


class VCRGzipSerializer(object):
"""Custom ZIP serializer for VCR.py."""

def serialize(self, cassette_dict):
# receives a dict, must return a string
# there can be binary data inside some of the requests,
# so it's impossible to use json for serialization to string
compressed = zlib.compress(pickle.dumps(cassette_dict, protocol=2))
return base64.b64encode(compressed).decode('utf8')

def deserialize(self, cassette_string):
# receives a string, must return a dict
decoded = base64.b64decode(cassette_string.encode('utf8'))
return pickle.loads(zlib.decompress(decoded))


my_vcr = vcr.VCR(cassette_library_dir=VCR_CASSETES_DIR, record_mode='once')
my_vcr.register_serializer('gz', VCRGzipSerializer())
my_vcr.serializer = 'gz'


def pytest_configure(config):
if config.option.update_cassettes:
# there's vcr `all` mode to update cassettes but it doesn't delete
# or clear existing records, so its size will always only grow
if os.path.exists(VCR_CASSETES_DIR):
shutil.rmtree(VCR_CASSETES_DIR)
elif config.option.ignore_cassettes:
# simple hack to just ignore vcr cassettes:
# - all record_mode means recording new interactions + no replay
# - before_record returning None means skipping all the requests
global my_vcr
my_vcr.record_mode = 'all'
my_vcr.before_record_request = lambda request: None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍



def is_using_real_services(request):
return (request.config.option.update_cassettes or
request.config.option.ignore_cassettes)


@pytest.fixture(scope='session')
def hsclient():
return HubstorageClient(auth=TEST_AUTH, endpoint=TEST_ENDPOINT)


@pytest.fixture(scope='session')
def hsproject(hsclient):
return hsclient.get_project(TEST_PROJECT_ID)


@my_vcr.use_cassette()
@pytest.fixture(scope='session')
def hsspiderid(hsproject):
return str(hsproject.ids.spider(TEST_SPIDER_NAME, create=1))
Copy link
Contributor

@chekunkov chekunkov Oct 28, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, after some thinking - let's keep it, minor thing and we cannot guarantee the same spider id in dev environment



@pytest.fixture(scope='session')
def hscollection(hsproject, request):
collection = get_test_collection(hsproject)
if is_using_real_services(request):
clean_collection(collection)
yield collection


@pytest.fixture(autouse=True, scope='session')
def setup_session(hsclient, hsproject, hscollection, request):
if is_using_real_services(request):
set_testbotgroup(hsproject)
remove_all_jobs(hsproject)
yield
hsclient.close()


@pytest.fixture(autouse=True)
def setup_vcrpy(request, hsproject):
# generates names like "test_module/test_function.yaml"
# otherwise it uses current function name (setup_vcrpy) for all tests
# other option is to add vcr decorator to each test separately
cassette_name = '{}/{}.gz'.format(
request.function.__module__.split('.')[-1],
request.function.__name__
)
if is_using_real_services(request):
remove_all_jobs(hsproject)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can reduce size of the cassettes even more by excluding this setup if we are using cassettes? It's needed only when you run tests against live servers and brings no value while using requests recorded in the cassettes. E.g. we can do this cleanup only if config.option.update_cassettes:

with my_vcr.use_cassette(cassette_name):
yield


# ----------------------------------------------------------------------------


def start_job(hsproject, **startparams):
jobdata = hsproject.jobq.start(**startparams)
if jobdata:
jobkey = jobdata.pop('key')
jobauth = (jobkey, jobdata['auth'])
return hsproject.get_job(jobkey, jobauth=jobauth, metadata=jobdata)


# Clean environment section


def remove_all_jobs(hsproject):
for k in list(hsproject.settings.keys()):
if k != 'botgroups':
del hsproject.settings[k]
hsproject.settings.save()

# Cleanup JobQ: run 2 times to ensure we covered all jobs
for queuename in ('pending', 'running', 'finished')*2:
info = hsproject.jobq.summary(queuename)
for summary in info['summary']:
_remove_job(hsproject, summary['key'])


def _remove_job(hsproject, jobkey):
hsproject.jobq.finish(jobkey)
hsproject.jobq.delete(jobkey)
# delete job
assert jobkey.startswith(TEST_PROJECT_ID), jobkey
hsproject.jobs.apidelete(jobkey.partition('/')[2])

# Collection helpers section


def get_test_collection(project):
return project.collections.new_store(TEST_COLLECTION_NAME)


def clean_collection(collection):
try:
for item in collection.iter_values():
collection.delete(item['_key'])
except HTTPError as e:
# if collection doesn't exist yet service responds 404
if e.response.status_code != 404:
raise


# Botgroups helpers section


def set_testbotgroup(hsproject):
hsproject.settings.apipost(jl={'botgroups': [TEST_BOTGROUP]})
# Additional step to populate JobQ's botgroups table
url = urlpathjoin(TEST_ENDPOINT, 'botgroups', TEST_BOTGROUP, 'max_running')
requests.post(url, auth=hsproject.auth, data='null')
hsproject.settings.expire()


def unset_testbotgroup(hsproject):
hsproject.settings.apidelete('botgroups')
hsproject.settings.expire()
# Additional step to delete botgroups in JobQ
url = urlpathjoin(TEST_ENDPOINT, 'botgroups', TEST_BOTGROUP)
requests.delete(url, auth=hsproject.auth)
102 changes: 0 additions & 102 deletions tests/hubstorage/hstestcase.py

This file was deleted.

45 changes: 22 additions & 23 deletions tests/hubstorage/test_activity.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,33 @@
"""
Test Activty
"""
from .hstestcase import HSTestCase
from six.moves import range


class ActivityTest(HSTestCase):
def test_post_and_reverse_get(hsproject):
# make some sample data
orig_data = [{u'foo': 42, u'counter': i} for i in range(20)]
data1 = orig_data[:10]
data2 = orig_data[10:]

def test_post_and_reverse_get(self):
# make some sample data
orig_data = [{u'foo': 42, u'counter': i} for i in range(20)]
data1 = orig_data[:10]
data2 = orig_data[10:]
# put ordered data in 2 separate posts
hsproject.activity.post(data1)
hsproject.activity.post(data2)

# put ordered data in 2 separate posts
self.project.activity.post(data1)
self.project.activity.post(data2)
# read them back in reverse chronological order
result = list(hsproject.activity.list(count=20))
assert len(result) == 20
assert orig_data[::-1] == result

# read them back in reverse chronological order
result = list(self.project.activity.list(count=20))
self.assertEqual(len(result), 20)
self.assertEqual(orig_data[::-1], result)

def test_filters(self):
self.project.activity.post({'c': i} for i in range(10))
r = list(self.project.activity.list(filter='["c", ">", [5]]', count=2))
self.assertEqual(r, [{'c': 9}, {'c': 8}])
def test_filters(hsproject):
hsproject.activity.post({'c': i} for i in range(10))
r = list(hsproject.activity.list(filter='["c", ">", [5]]', count=2))
assert r == [{'c': 9}, {'c': 8}]

def test_timestamp(self):
self.project.activity.add({'foo': 'bar'}, baz='qux')
entry = next(self.project.activity.list(count=1, meta='_ts'))
self.assertTrue(entry.pop('_ts', None))
self.assertEqual(entry, {'foo': 'bar', 'baz': 'qux'})

def test_timestamp(hsproject):
hsproject.activity.add({'foo': 'bar'}, baz='qux')
entry = next(hsproject.activity.list(count=1, meta='_ts'))
assert entry.pop('_ts', None)
assert entry == {'foo': 'bar', 'baz': 'qux'}
Loading