-
Notifications
You must be signed in to change notification settings - Fork 60
Pytest and vcrpy to improve sh.hubstorage tests #32
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
14510ec
e23ebf0
856ddfb
6367cc8
4541ecd
0d76f36
b1e5239
bbd4687
63f0b8c
0602bba
332454f
4a842dc
5f3bebe
9e29f62
d6d98d2
fc28d8a
59fe99c
0d19b04
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
-r requirements-pypy.txt | ||
|
||
mock | ||
vcrpy==1.10.3 | ||
pytest | ||
pytest-cov | ||
responses==0.5.0 | ||
responses==0.5.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
|
||
def pytest_addoption(parser): | ||
parser.addoption( | ||
"--update-cassettes", action="store_true", default=False, | ||
help="test with real services rewriting existing vcr cassettes") | ||
parser.addoption( | ||
"--ignore-cassettes", action="store_true", default=False, | ||
help="test with real services skipping existing vcr cassettes") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
import os | ||
import zlib | ||
import base64 | ||
import pickle | ||
|
||
import vcr | ||
import pytest | ||
import shutil | ||
import requests | ||
from requests import HTTPError | ||
|
||
from scrapinghub import HubstorageClient | ||
from scrapinghub.hubstorage.utils import urlpathjoin | ||
|
||
|
||
TEST_PROJECT_ID = "2222222" | ||
TEST_SPIDER_NAME = 'hs-test-spider' | ||
TEST_FRONTIER_NAME = 'test' | ||
TEST_FRONTIER_SLOT = 'site.com' | ||
TEST_BOTGROUP = 'python-hubstorage-test' | ||
TEST_COLLECTION_NAME = "test_collection_123" | ||
TEST_AUTH = os.getenv('HS_AUTH', 'f' * 32) | ||
TEST_ENDPOINT = os.getenv('HS_ENDPOINT', 'http://storage.vm.scrapinghub.com') | ||
|
||
# vcrpy creates the cassetes automatically under VCR_CASSETES_DIR | ||
VCR_CASSETES_DIR = 'tests/hubstorage/cassetes' | ||
|
||
|
||
class VCRGzipSerializer(object): | ||
"""Custom ZIP serializer for VCR.py.""" | ||
|
||
def serialize(self, cassette_dict): | ||
# receives a dict, must return a string | ||
# there can be binary data inside some of the requests, | ||
# so it's impossible to use json for serialization to string | ||
compressed = zlib.compress(pickle.dumps(cassette_dict, protocol=2)) | ||
return base64.b64encode(compressed).decode('utf8') | ||
|
||
def deserialize(self, cassette_string): | ||
# receives a string, must return a dict | ||
decoded = base64.b64decode(cassette_string.encode('utf8')) | ||
return pickle.loads(zlib.decompress(decoded)) | ||
|
||
|
||
my_vcr = vcr.VCR(cassette_library_dir=VCR_CASSETES_DIR, record_mode='once') | ||
my_vcr.register_serializer('gz', VCRGzipSerializer()) | ||
my_vcr.serializer = 'gz' | ||
|
||
|
||
def pytest_configure(config): | ||
if config.option.update_cassettes: | ||
# there's vcr `all` mode to update cassettes but it doesn't delete | ||
# or clear existing records, so its size will always only grow | ||
if os.path.exists(VCR_CASSETES_DIR): | ||
shutil.rmtree(VCR_CASSETES_DIR) | ||
elif config.option.ignore_cassettes: | ||
# simple hack to just ignore vcr cassettes: | ||
# - all record_mode means recording new interactions + no replay | ||
# - before_record returning None means skipping all the requests | ||
global my_vcr | ||
my_vcr.record_mode = 'all' | ||
my_vcr.before_record_request = lambda request: None | ||
|
||
|
||
def is_using_real_services(request): | ||
return (request.config.option.update_cassettes or | ||
request.config.option.ignore_cassettes) | ||
|
||
|
||
@pytest.fixture(scope='session') | ||
def hsclient(): | ||
return HubstorageClient(auth=TEST_AUTH, endpoint=TEST_ENDPOINT) | ||
|
||
|
||
@pytest.fixture(scope='session') | ||
def hsproject(hsclient): | ||
return hsclient.get_project(TEST_PROJECT_ID) | ||
|
||
|
||
@my_vcr.use_cassette() | ||
@pytest.fixture(scope='session') | ||
def hsspiderid(hsproject): | ||
return str(hsproject.ids.spider(TEST_SPIDER_NAME, create=1)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, after some thinking - let's keep it, minor thing and we cannot guarantee the same spider id in dev environment |
||
|
||
|
||
@pytest.fixture(scope='session') | ||
def hscollection(hsproject, request): | ||
collection = get_test_collection(hsproject) | ||
if is_using_real_services(request): | ||
clean_collection(collection) | ||
yield collection | ||
|
||
|
||
@pytest.fixture(autouse=True, scope='session') | ||
def setup_session(hsclient, hsproject, hscollection, request): | ||
if is_using_real_services(request): | ||
set_testbotgroup(hsproject) | ||
remove_all_jobs(hsproject) | ||
yield | ||
hsclient.close() | ||
|
||
|
||
@pytest.fixture(autouse=True) | ||
def setup_vcrpy(request, hsproject): | ||
# generates names like "test_module/test_function.yaml" | ||
# otherwise it uses current function name (setup_vcrpy) for all tests | ||
# other option is to add vcr decorator to each test separately | ||
cassette_name = '{}/{}.gz'.format( | ||
request.function.__module__.split('.')[-1], | ||
request.function.__name__ | ||
) | ||
if is_using_real_services(request): | ||
remove_all_jobs(hsproject) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we can reduce size of the cassettes even more by excluding this setup if we are using cassettes? It's needed only when you run tests against live servers and brings no value while using requests recorded in the cassettes. E.g. we can do this cleanup only |
||
with my_vcr.use_cassette(cassette_name): | ||
yield | ||
|
||
|
||
# ---------------------------------------------------------------------------- | ||
|
||
|
||
def start_job(hsproject, **startparams): | ||
jobdata = hsproject.jobq.start(**startparams) | ||
if jobdata: | ||
jobkey = jobdata.pop('key') | ||
jobauth = (jobkey, jobdata['auth']) | ||
return hsproject.get_job(jobkey, jobauth=jobauth, metadata=jobdata) | ||
|
||
|
||
# Clean environment section | ||
|
||
|
||
def remove_all_jobs(hsproject): | ||
for k in list(hsproject.settings.keys()): | ||
if k != 'botgroups': | ||
del hsproject.settings[k] | ||
hsproject.settings.save() | ||
|
||
# Cleanup JobQ: run 2 times to ensure we covered all jobs | ||
for queuename in ('pending', 'running', 'finished')*2: | ||
info = hsproject.jobq.summary(queuename) | ||
for summary in info['summary']: | ||
_remove_job(hsproject, summary['key']) | ||
|
||
|
||
def _remove_job(hsproject, jobkey): | ||
hsproject.jobq.finish(jobkey) | ||
hsproject.jobq.delete(jobkey) | ||
# delete job | ||
assert jobkey.startswith(TEST_PROJECT_ID), jobkey | ||
hsproject.jobs.apidelete(jobkey.partition('/')[2]) | ||
|
||
# Collection helpers section | ||
|
||
|
||
def get_test_collection(project): | ||
return project.collections.new_store(TEST_COLLECTION_NAME) | ||
|
||
|
||
def clean_collection(collection): | ||
try: | ||
for item in collection.iter_values(): | ||
collection.delete(item['_key']) | ||
except HTTPError as e: | ||
# if collection doesn't exist yet service responds 404 | ||
if e.response.status_code != 404: | ||
raise | ||
|
||
|
||
# Botgroups helpers section | ||
|
||
|
||
def set_testbotgroup(hsproject): | ||
hsproject.settings.apipost(jl={'botgroups': [TEST_BOTGROUP]}) | ||
# Additional step to populate JobQ's botgroups table | ||
url = urlpathjoin(TEST_ENDPOINT, 'botgroups', TEST_BOTGROUP, 'max_running') | ||
requests.post(url, auth=hsproject.auth, data='null') | ||
hsproject.settings.expire() | ||
|
||
|
||
def unset_testbotgroup(hsproject): | ||
hsproject.settings.apidelete('botgroups') | ||
hsproject.settings.expire() | ||
# Additional step to delete botgroups in JobQ | ||
url = urlpathjoin(TEST_ENDPOINT, 'botgroups', TEST_BOTGROUP) | ||
requests.delete(url, auth=hsproject.auth) |
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,33 @@ | ||
""" | ||
Test Activty | ||
""" | ||
from .hstestcase import HSTestCase | ||
from six.moves import range | ||
|
||
|
||
class ActivityTest(HSTestCase): | ||
def test_post_and_reverse_get(hsproject): | ||
# make some sample data | ||
orig_data = [{u'foo': 42, u'counter': i} for i in range(20)] | ||
data1 = orig_data[:10] | ||
data2 = orig_data[10:] | ||
|
||
def test_post_and_reverse_get(self): | ||
# make some sample data | ||
orig_data = [{u'foo': 42, u'counter': i} for i in range(20)] | ||
data1 = orig_data[:10] | ||
data2 = orig_data[10:] | ||
# put ordered data in 2 separate posts | ||
hsproject.activity.post(data1) | ||
hsproject.activity.post(data2) | ||
|
||
# put ordered data in 2 separate posts | ||
self.project.activity.post(data1) | ||
self.project.activity.post(data2) | ||
# read them back in reverse chronological order | ||
result = list(hsproject.activity.list(count=20)) | ||
assert len(result) == 20 | ||
assert orig_data[::-1] == result | ||
|
||
# read them back in reverse chronological order | ||
result = list(self.project.activity.list(count=20)) | ||
self.assertEqual(len(result), 20) | ||
self.assertEqual(orig_data[::-1], result) | ||
|
||
def test_filters(self): | ||
self.project.activity.post({'c': i} for i in range(10)) | ||
r = list(self.project.activity.list(filter='["c", ">", [5]]', count=2)) | ||
self.assertEqual(r, [{'c': 9}, {'c': 8}]) | ||
def test_filters(hsproject): | ||
hsproject.activity.post({'c': i} for i in range(10)) | ||
r = list(hsproject.activity.list(filter='["c", ">", [5]]', count=2)) | ||
assert r == [{'c': 9}, {'c': 8}] | ||
|
||
def test_timestamp(self): | ||
self.project.activity.add({'foo': 'bar'}, baz='qux') | ||
entry = next(self.project.activity.list(count=1, meta='_ts')) | ||
self.assertTrue(entry.pop('_ts', None)) | ||
self.assertEqual(entry, {'foo': 'bar', 'baz': 'qux'}) | ||
|
||
def test_timestamp(hsproject): | ||
hsproject.activity.add({'foo': 'bar'}, baz='qux') | ||
entry = next(hsproject.activity.list(count=1, meta='_ts')) | ||
assert entry.pop('_ts', None) | ||
assert entry == {'foo': 'bar', 'baz': 'qux'} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍