Skip to content

Commit 0533192

Browse files
committed
Add tests for sh.hs.activity/batchuploader
1 parent 85c7d58 commit 0533192

File tree

2 files changed

+89
-82
lines changed

2 files changed

+89
-82
lines changed

tests/hubstorage/test_activity.py

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,33 @@
11
"""
22
Test Activty
33
"""
4-
from .hstestcase import HSTestCase
54
from six.moves import range
65

76

8-
class ActivityTest(HSTestCase):
7+
def test_post_and_reverse_get(hsproject):
8+
# make some sample data
9+
orig_data = [{u'foo': 42, u'counter': i} for i in range(20)]
10+
data1 = orig_data[:10]
11+
data2 = orig_data[10:]
912

10-
def test_post_and_reverse_get(self):
11-
# make some sample data
12-
orig_data = [{u'foo': 42, u'counter': i} for i in range(20)]
13-
data1 = orig_data[:10]
14-
data2 = orig_data[10:]
13+
# put ordered data in 2 separate posts
14+
hsproject.activity.post(data1)
15+
hsproject.activity.post(data2)
1516

16-
# put ordered data in 2 separate posts
17-
self.project.activity.post(data1)
18-
self.project.activity.post(data2)
17+
# read them back in reverse chronological order
18+
result = list(hsproject.activity.list(count=20))
19+
assert len(result) == 20
20+
assert orig_data[::-1] == result
1921

20-
# read them back in reverse chronological order
21-
result = list(self.project.activity.list(count=20))
22-
self.assertEqual(len(result), 20)
23-
self.assertEqual(orig_data[::-1], result)
2422

25-
def test_filters(self):
26-
self.project.activity.post({'c': i} for i in range(10))
27-
r = list(self.project.activity.list(filter='["c", ">", [5]]', count=2))
28-
self.assertEqual(r, [{'c': 9}, {'c': 8}])
23+
def test_filters(hsproject):
24+
hsproject.activity.post({'c': i} for i in range(10))
25+
r = list(hsproject.activity.list(filter='["c", ">", [5]]', count=2))
26+
assert r == [{'c': 9}, {'c': 8}]
2927

30-
def test_timestamp(self):
31-
self.project.activity.add({'foo': 'bar'}, baz='qux')
32-
entry = next(self.project.activity.list(count=1, meta='_ts'))
33-
self.assertTrue(entry.pop('_ts', None))
34-
self.assertEqual(entry, {'foo': 'bar', 'baz': 'qux'})
28+
29+
def test_timestamp(hsproject):
30+
hsproject.activity.add({'foo': 'bar'}, baz='qux')
31+
entry = next(hsproject.activity.list(count=1, meta='_ts'))
32+
assert entry.pop('_ts', None)
33+
assert entry == {'foo': 'bar', 'baz': 'qux'}

tests/hubstorage/test_batchuploader.py

Lines changed: 67 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -2,73 +2,81 @@
22
Test Project
33
"""
44
import time
5+
import pytest
56
from six.moves import range
67
from collections import defaultdict
7-
from .hstestcase import HSTestCase
88

99
from scrapinghub.hubstorage import ValueTooLarge
10+
from .conftest import TEST_SPIDER_NAME, TEST_AUTH
1011

1112

12-
class BatchUploaderTest(HSTestCase):
13+
def _job_and_writer(hsclient, hsproject, **writerargs):
14+
job = hsproject.push_job(TEST_SPIDER_NAME)
15+
hsproject.jobq.start(job)
16+
batch_uploader = hsclient.batchuploader
17+
writer = batch_uploader.create_writer(
18+
job.items.url, auth=TEST_AUTH, **writerargs)
19+
return job, writer
1320

14-
def _job_and_writer(self, **writerargs):
15-
self.project.push_job(self.spidername)
16-
job = self.start_job()
17-
bu = self.hsclient.batchuploader
18-
w = bu.create_writer(job.items.url, auth=self.auth, **writerargs)
19-
return job, w
2021

21-
def test_writer_batchsize(self):
22-
job, w = self._job_and_writer(size=10)
23-
for x in range(111):
24-
w.write({'x': x})
25-
w.close()
26-
# this works only for small batches (previous size=10 and small data)
27-
# as internally HS may commit a single large request as many smaller
28-
# commits, each with different timestamps
29-
groups = defaultdict(int)
30-
for doc in job.items.list(meta=['_ts']):
31-
groups[doc['_ts']] += 1
32-
33-
self.assertEqual(len(groups), 12)
34-
35-
def test_writer_maxitemsize(self):
36-
job, w = self._job_and_writer()
37-
m = w.maxitemsize
38-
self.assertRaisesRegexp(
39-
ValueTooLarge,
40-
'Value exceeds max encoded size of 1048576 bytes:'
41-
' \'{"b": "x+\\.\\.\\.\'',
42-
w.write, {'b': 'x' * m})
43-
self.assertRaisesRegexp(
44-
ValueTooLarge,
45-
'Value exceeds max encoded size of 1048576 bytes:'
46-
' \'{"b+\\.\\.\\.\'',
47-
w.write, {'b'*m: 'x'})
48-
self.assertRaisesRegexp(
49-
ValueTooLarge,
50-
'Value exceeds max encoded size of 1048576 bytes:'
51-
' \'{"b+\\.\\.\\.\'',
52-
w.write, {'b'*(m//2): 'x'*(m//2)})
53-
54-
def test_writer_contentencoding(self):
55-
for ce in ('identity', 'gzip'):
56-
job, w = self._job_and_writer(content_encoding=ce)
57-
for x in range(111):
58-
w.write({'x': x})
59-
w.close()
60-
self.assertEqual(job.items.stats()['totals']['input_values'], 111)
61-
62-
def test_writer_interval(self):
63-
job, w = self._job_and_writer(size=1000, interval=1)
22+
def test_writer_batchsize(hsclient, hsproject):
23+
job, writer = _job_and_writer(hsclient, hsproject, size=10)
24+
for x in range(111):
25+
writer.write({'x': x})
26+
writer.close()
27+
# this works only for small batches (previous size=10 and small data)
28+
# as internally HS may commit a single large request as many smaller
29+
# commits, each with different timestamps
30+
groups = defaultdict(int)
31+
for doc in job.items.list(meta=['_ts']):
32+
groups[doc['_ts']] += 1
33+
34+
assert len(groups) == 12
35+
36+
37+
def test_writer_maxitemsize(hsclient, hsproject):
38+
_, writer = _job_and_writer(hsclient, hsproject)
39+
max_size = writer.maxitemsize
40+
with pytest.raises(ValueTooLarge) as excinfo1:
41+
writer.write({'b': 'x' * max_size})
42+
excinfo1.match(
43+
r'Value exceeds max encoded size of 1048576 bytes:'
44+
' \'{"b": "x+\\.\\.\\.\'')
45+
46+
with pytest.raises(ValueTooLarge) as excinfo2:
47+
writer.write({'b'*max_size: 'x'})
48+
excinfo2.match(
49+
r'Value exceeds max encoded size of 1048576 bytes:'
50+
' \'{"b+\\.\\.\\.\'')
51+
52+
with pytest.raises(ValueTooLarge) as excinfo3:
53+
writer.write({'b'*(max_size//2): 'x'*(max_size//2)})
54+
excinfo3.match(
55+
r'Value exceeds max encoded size of 1048576 bytes:'
56+
' \'{"b+\\.\\.\\.\'')
57+
58+
59+
def test_writer_contentencoding(hsclient, hsproject):
60+
for ce in ('identity', 'gzip'):
61+
job, writer = _job_and_writer(hsclient, hsproject,
62+
content_encoding=ce)
6463
for x in range(111):
65-
w.write({'x': x})
66-
if x == 50:
67-
time.sleep(2)
64+
writer.write({'x': x})
65+
writer.close()
66+
assert job.items.stats()['totals']['input_values'] == 111
67+
68+
69+
def test_writer_interval(hsclient, hsproject):
70+
job, writer = _job_and_writer(hsclient, hsproject,
71+
size=1000, interval=1)
72+
for x in range(111):
73+
writer.write({'x': x})
74+
if x == 50:
75+
time.sleep(2)
6876

69-
w.close()
70-
groups = defaultdict(int)
71-
for doc in job.items.list(meta=['_ts']):
72-
groups[doc['_ts']] += 1
77+
writer.close()
78+
groups = defaultdict(int)
79+
for doc in job.items.list(meta=['_ts']):
80+
groups[doc['_ts']] += 1
7381

74-
self.assertEqual(len(groups), 2)
82+
assert len(groups) == 2

0 commit comments

Comments
 (0)