Add tests for sh.hs.activity/batchuploader

vshlapakov · vshlapakov · commit 05331921a06c · 2016-10-19T15:45:43.000+03:00
diff --git a/tests/hubstorage/test_activity.py b/tests/hubstorage/test_activity.py
@@ -1,34 +1,33 @@
 """
 Test Activty
 """
-from .hstestcase import HSTestCase
 from six.moves import range
 
 
-class ActivityTest(HSTestCase):
+def test_post_and_reverse_get(hsproject):
+    # make some sample data
+    orig_data = [{u'foo': 42, u'counter': i} for i in range(20)]
+    data1 = orig_data[:10]
+    data2 = orig_data[10:]
 
-    def test_post_and_reverse_get(self):
-        # make some sample data
-        orig_data = [{u'foo': 42, u'counter': i} for i in range(20)]
-        data1 = orig_data[:10]
-        data2 = orig_data[10:]
+    # put ordered data in 2 separate posts
+    hsproject.activity.post(data1)
+    hsproject.activity.post(data2)
 
-        # put ordered data in 2 separate posts
-        self.project.activity.post(data1)
-        self.project.activity.post(data2)
+    # read them back in reverse chronological order
+    result = list(hsproject.activity.list(count=20))
+    assert len(result) == 20
+    assert orig_data[::-1] == result
 
-        # read them back in reverse chronological order
-        result = list(self.project.activity.list(count=20))
-        self.assertEqual(len(result), 20)
-        self.assertEqual(orig_data[::-1], result)
 
-    def test_filters(self):
-        self.project.activity.post({'c': i} for i in range(10))
-        r = list(self.project.activity.list(filter='["c", ">", [5]]', count=2))
-        self.assertEqual(r, [{'c': 9}, {'c': 8}])
+def test_filters(hsproject):
+    hsproject.activity.post({'c': i} for i in range(10))
+    r = list(hsproject.activity.list(filter='["c", ">", [5]]', count=2))
+    assert r == [{'c': 9}, {'c': 8}]
 
-    def test_timestamp(self):
-        self.project.activity.add({'foo': 'bar'}, baz='qux')
-        entry = next(self.project.activity.list(count=1, meta='_ts'))
-        self.assertTrue(entry.pop('_ts', None))
-        self.assertEqual(entry, {'foo': 'bar', 'baz': 'qux'})
+
+def test_timestamp(hsproject):
+    hsproject.activity.add({'foo': 'bar'}, baz='qux')
+    entry = next(hsproject.activity.list(count=1, meta='_ts'))
+    assert entry.pop('_ts', None)
+    assert entry == {'foo': 'bar', 'baz': 'qux'}
diff --git a/tests/hubstorage/test_batchuploader.py b/tests/hubstorage/test_batchuploader.py
@@ -2,73 +2,81 @@
 Test Project
 """
 import time
+import pytest
 from six.moves import range
 from collections import defaultdict
-from .hstestcase import HSTestCase
 
 from scrapinghub.hubstorage import ValueTooLarge
+from .conftest import TEST_SPIDER_NAME, TEST_AUTH
 
 
-class BatchUploaderTest(HSTestCase):
+def _job_and_writer(hsclient, hsproject, **writerargs):
+    job = hsproject.push_job(TEST_SPIDER_NAME)
+    hsproject.jobq.start(job)
+    batch_uploader = hsclient.batchuploader
+    writer = batch_uploader.create_writer(
+        job.items.url, auth=TEST_AUTH, **writerargs)
+    return job, writer
 
-    def _job_and_writer(self, **writerargs):
-        self.project.push_job(self.spidername)
-        job = self.start_job()
-        bu = self.hsclient.batchuploader
-        w = bu.create_writer(job.items.url, auth=self.auth, **writerargs)
-        return job, w
 
-    def test_writer_batchsize(self):
-        job, w = self._job_and_writer(size=10)
-        for x in range(111):
-            w.write({'x': x})
-        w.close()
-        # this works only for small batches (previous size=10 and small data)
-        # as internally HS may commit a single large request as many smaller
-        # commits, each with different timestamps
-        groups = defaultdict(int)
-        for doc in job.items.list(meta=['_ts']):
-            groups[doc['_ts']] += 1
-
-        self.assertEqual(len(groups), 12)
-
-    def test_writer_maxitemsize(self):
-        job, w = self._job_and_writer()
-        m = w.maxitemsize
-        self.assertRaisesRegexp(
-            ValueTooLarge,
-            'Value exceeds max encoded size of 1048576 bytes:'
-            ' \'{"b": "x+\\.\\.\\.\'',
-            w.write, {'b': 'x' * m})
-        self.assertRaisesRegexp(
-            ValueTooLarge,
-            'Value exceeds max encoded size of 1048576 bytes:'
-            ' \'{"b+\\.\\.\\.\'',
-            w.write, {'b'*m: 'x'})
-        self.assertRaisesRegexp(
-            ValueTooLarge,
-            'Value exceeds max encoded size of 1048576 bytes:'
-            ' \'{"b+\\.\\.\\.\'',
-            w.write, {'b'*(m//2): 'x'*(m//2)})
-
-    def test_writer_contentencoding(self):
-        for ce in ('identity', 'gzip'):
-            job, w = self._job_and_writer(content_encoding=ce)
-            for x in range(111):
-                w.write({'x': x})
-            w.close()
-            self.assertEqual(job.items.stats()['totals']['input_values'], 111)
-
-    def test_writer_interval(self):
-        job, w = self._job_and_writer(size=1000, interval=1)
+def test_writer_batchsize(hsclient, hsproject):
+    job, writer = _job_and_writer(hsclient, hsproject, size=10)
+    for x in range(111):
+        writer.write({'x': x})
+    writer.close()
+    # this works only for small batches (previous size=10 and small data)
+    # as internally HS may commit a single large request as many smaller
+    # commits, each with different timestamps
+    groups = defaultdict(int)
+    for doc in job.items.list(meta=['_ts']):
+        groups[doc['_ts']] += 1
+
+    assert len(groups) == 12
+
+
+def test_writer_maxitemsize(hsclient, hsproject):
+    _, writer = _job_and_writer(hsclient, hsproject)
+    max_size = writer.maxitemsize
+    with pytest.raises(ValueTooLarge) as excinfo1:
+        writer.write({'b': 'x' * max_size})
+    excinfo1.match(
+        r'Value exceeds max encoded size of 1048576 bytes:'
+        ' \'{"b": "x+\\.\\.\\.\'')
+
+    with pytest.raises(ValueTooLarge) as excinfo2:
+        writer.write({'b'*max_size: 'x'})
+    excinfo2.match(
+        r'Value exceeds max encoded size of 1048576 bytes:'
+        ' \'{"b+\\.\\.\\.\'')
+
+    with pytest.raises(ValueTooLarge) as excinfo3:
+        writer.write({'b'*(max_size//2): 'x'*(max_size//2)})
+    excinfo3.match(
+        r'Value exceeds max encoded size of 1048576 bytes:'
+        ' \'{"b+\\.\\.\\.\'')
+
+
+def test_writer_contentencoding(hsclient, hsproject):
+    for ce in ('identity', 'gzip'):
+        job, writer = _job_and_writer(hsclient, hsproject,
+                                      content_encoding=ce)
         for x in range(111):
-            w.write({'x': x})
-            if x == 50:
-                time.sleep(2)
+            writer.write({'x': x})
+        writer.close()
+        assert job.items.stats()['totals']['input_values'] == 111
+
+
+def test_writer_interval(hsclient, hsproject):
+    job, writer = _job_and_writer(hsclient, hsproject,
+                                  size=1000, interval=1)
+    for x in range(111):
+        writer.write({'x': x})
+        if x == 50:
+            time.sleep(2)
 
-        w.close()
-        groups = defaultdict(int)
-        for doc in job.items.list(meta=['_ts']):
-            groups[doc['_ts']] += 1
+    writer.close()
+    groups = defaultdict(int)
+    for doc in job.items.list(meta=['_ts']):
+        groups[doc['_ts']] += 1
 
-        self.assertEqual(len(groups), 2)
+    assert len(groups) == 2